@@ -41,5 +41,6 @@ coverage.out | |||
/dist | |||
/custom | |||
/data | |||
/indexers | |||
/log | |||
/public/img/avatar |
@@ -158,6 +158,10 @@ SSL_MODE = disable | |||
; For "sqlite3" and "tidb", use absolute path when you start as service | |||
PATH = data/gitea.db | |||
[indexer] | |||
ISSUE_INDEXER_PATH = indexers/issues.bleve | |||
UPDATE_BUFFER_LEN = 20 | |||
[admin] | |||
[security] |
@@ -17,6 +17,7 @@ import ( | |||
"code.gitea.io/gitea/modules/base" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
) | |||
var ( | |||
@@ -451,8 +452,11 @@ func (issue *Issue) ReadBy(userID int64) error { | |||
} | |||
func updateIssueCols(e Engine, issue *Issue, cols ...string) error { | |||
_, err := e.Id(issue.ID).Cols(cols...).Update(issue) | |||
return err | |||
if _, err := e.Id(issue.ID).Cols(cols...).Update(issue); err != nil { | |||
return err | |||
} | |||
UpdateIssueIndexer(issue) | |||
return nil | |||
} | |||
// UpdateIssueCols only updates values of specific columns for given issue. | |||
@@ -733,6 +737,8 @@ func newIssue(e *xorm.Session, opts NewIssueOptions) (err error) { | |||
return err | |||
} | |||
UpdateIssueIndexer(opts.Issue) | |||
if len(opts.Attachments) > 0 { | |||
attachments, err := getAttachmentsByUUIDs(e, opts.Attachments) | |||
if err != nil { | |||
@@ -865,10 +871,11 @@ type IssuesOptions struct { | |||
MilestoneID int64 | |||
RepoIDs []int64 | |||
Page int | |||
IsClosed bool | |||
IsPull bool | |||
IsClosed util.OptionalBool | |||
IsPull util.OptionalBool | |||
Labels string | |||
SortType string | |||
IssueIDs []int64 | |||
} | |||
// sortIssuesSession sort an issues-related session based on the provided | |||
@@ -894,11 +901,23 @@ func sortIssuesSession(sess *xorm.Session, sortType string) { | |||
// Issues returns a list of issues by given conditions. | |||
func Issues(opts *IssuesOptions) ([]*Issue, error) { | |||
if opts.Page <= 0 { | |||
opts.Page = 1 | |||
var sess *xorm.Session | |||
if opts.Page >= 0 { | |||
var start int | |||
if opts.Page == 0 { | |||
start = 0 | |||
} else { | |||
start = (opts.Page - 1) * setting.UI.IssuePagingNum | |||
} | |||
sess = x.Limit(setting.UI.IssuePagingNum, start) | |||
} else { | |||
sess = x.NewSession() | |||
defer sess.Close() | |||
} | |||
sess := x.Limit(setting.UI.IssuePagingNum, (opts.Page-1)*setting.UI.IssuePagingNum) | |||
if len(opts.IssueIDs) > 0 { | |||
sess.In("issue.id", opts.IssueIDs) | |||
} | |||
if opts.RepoID > 0 { | |||
sess.And("issue.repo_id=?", opts.RepoID) | |||
@@ -906,7 +925,13 @@ func Issues(opts *IssuesOptions) ([]*Issue, error) { | |||
// In case repository IDs are provided but actually no repository has issue. | |||
sess.In("issue.repo_id", opts.RepoIDs) | |||
} | |||
sess.And("issue.is_closed=?", opts.IsClosed) | |||
switch opts.IsClosed { | |||
case util.OptionalBoolTrue: | |||
sess.And("issue.is_closed=true") | |||
case util.OptionalBoolFalse: | |||
sess.And("issue.is_closed=false") | |||
} | |||
if opts.AssigneeID > 0 { | |||
sess.And("issue.assignee_id=?", opts.AssigneeID) | |||
@@ -926,7 +951,12 @@ func Issues(opts *IssuesOptions) ([]*Issue, error) { | |||
sess.And("issue.milestone_id=?", opts.MilestoneID) | |||
} | |||
sess.And("issue.is_pull=?", opts.IsPull) | |||
switch opts.IsPull { | |||
case util.OptionalBoolTrue: | |||
sess.And("issue.is_pull=true") | |||
case util.OptionalBoolFalse: | |||
sess.And("issue.is_pull=false") | |||
} | |||
sortIssuesSession(sess, opts.SortType) | |||
@@ -1168,10 +1198,11 @@ type IssueStatsOptions struct { | |||
MentionedID int64 | |||
PosterID int64 | |||
IsPull bool | |||
IssueIDs []int64 | |||
} | |||
// GetIssueStats returns issue statistic information by given conditions. | |||
func GetIssueStats(opts *IssueStatsOptions) *IssueStats { | |||
func GetIssueStats(opts *IssueStatsOptions) (*IssueStats, error) { | |||
stats := &IssueStats{} | |||
countSession := func(opts *IssueStatsOptions) *xorm.Session { | |||
@@ -1179,6 +1210,10 @@ func GetIssueStats(opts *IssueStatsOptions) *IssueStats { | |||
Where("issue.repo_id = ?", opts.RepoID). | |||
And("is_pull = ?", opts.IsPull) | |||
if len(opts.IssueIDs) > 0 { | |||
sess.In("issue.id", opts.IssueIDs) | |||
} | |||
if len(opts.Labels) > 0 && opts.Labels != "0" { | |||
labelIDs, err := base.StringsToInt64s(strings.Split(opts.Labels, ",")) | |||
if err != nil { | |||
@@ -1210,13 +1245,20 @@ func GetIssueStats(opts *IssueStatsOptions) *IssueStats { | |||
return sess | |||
} | |||
stats.OpenCount, _ = countSession(opts). | |||
var err error | |||
stats.OpenCount, err = countSession(opts). | |||
And("is_closed = ?", false). | |||
Count(&Issue{}) | |||
stats.ClosedCount, _ = countSession(opts). | |||
if err != nil { | |||
return nil, err | |||
} | |||
stats.ClosedCount, err = countSession(opts). | |||
And("is_closed = ?", true). | |||
Count(&Issue{}) | |||
return stats | |||
if err != nil { | |||
return nil, err | |||
} | |||
return stats, nil | |||
} | |||
// GetUserIssueStats returns issue statistic information for dashboard by given conditions. | |||
@@ -1294,7 +1336,11 @@ func GetRepoIssueStats(repoID, uid int64, filterMode int, isPull bool) (numOpen | |||
func updateIssue(e Engine, issue *Issue) error { | |||
_, err := e.Id(issue.ID).AllCols().Update(issue) | |||
return err | |||
if err != nil { | |||
return err | |||
} | |||
UpdateIssueIndexer(issue) | |||
return nil | |||
} | |||
// UpdateIssue updates all fields of given issue. |
@@ -454,28 +454,20 @@ func UpdateComment(c *Comment) error { | |||
return err | |||
} | |||
// DeleteCommentByID deletes the comment by given ID. | |||
func DeleteCommentByID(id int64) error { | |||
comment, err := GetCommentByID(id) | |||
if err != nil { | |||
if IsErrCommentNotExist(err) { | |||
return nil | |||
} | |||
return err | |||
} | |||
// DeleteComment deletes the comment | |||
func DeleteComment(comment *Comment) error { | |||
sess := x.NewSession() | |||
defer sessionRelease(sess) | |||
if err = sess.Begin(); err != nil { | |||
if err := sess.Begin(); err != nil { | |||
return err | |||
} | |||
if _, err = sess.Id(comment.ID).Delete(new(Comment)); err != nil { | |||
if _, err := sess.Id(comment.ID).Delete(new(Comment)); err != nil { | |||
return err | |||
} | |||
if comment.Type == CommentTypeComment { | |||
if _, err = sess.Exec("UPDATE `issue` SET num_comments = num_comments - 1 WHERE id = ?", comment.IssueID); err != nil { | |||
if _, err := sess.Exec("UPDATE `issue` SET num_comments = num_comments - 1 WHERE id = ?", comment.IssueID); err != nil { | |||
return err | |||
} | |||
} |
@@ -0,0 +1,183 @@ | |||
// Copyright 2017 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package models | |||
import ( | |||
"fmt" | |||
"os" | |||
"strconv" | |||
"strings" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
"github.com/blevesearch/bleve" | |||
"github.com/blevesearch/bleve/analysis/analyzer/simple" | |||
"github.com/blevesearch/bleve/search/query" | |||
) | |||
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues | |||
// indexer | |||
var issueIndexerUpdateQueue chan *Issue | |||
// issueIndexer (thread-safe) index for searching issues | |||
var issueIndexer bleve.Index | |||
// issueIndexerData data stored in the issue indexer | |||
type issueIndexerData struct { | |||
ID int64 | |||
RepoID int64 | |||
Title string | |||
Content string | |||
} | |||
// numericQuery an numeric-equality query for the given value and field | |||
func numericQuery(value int64, field string) *query.NumericRangeQuery { | |||
f := float64(value) | |||
tru := true | |||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) | |||
q.SetField(field) | |||
return q | |||
} | |||
// SearchIssuesByKeyword searches for issues by given conditions. | |||
// Returns the matching issue IDs | |||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { | |||
fields := strings.Fields(strings.ToLower(keyword)) | |||
indexerQuery := bleve.NewConjunctionQuery( | |||
numericQuery(repoID, "RepoID"), | |||
bleve.NewDisjunctionQuery( | |||
bleve.NewPhraseQuery(fields, "Title"), | |||
bleve.NewPhraseQuery(fields, "Content"), | |||
)) | |||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) | |||
search.Fields = []string{"ID"} | |||
result, err := issueIndexer.Search(search) | |||
if err != nil { | |||
return nil, err | |||
} | |||
issueIDs := make([]int64, len(result.Hits)) | |||
for i, hit := range result.Hits { | |||
issueIDs[i] = int64(hit.Fields["ID"].(float64)) | |||
} | |||
return issueIDs, nil | |||
} | |||
// InitIssueIndexer initialize issue indexer | |||
func InitIssueIndexer() { | |||
_, err := os.Stat(setting.Indexer.IssuePath) | |||
if err != nil { | |||
if os.IsNotExist(err) { | |||
if err = createIssueIndexer(); err != nil { | |||
log.Fatal(4, "CreateIssuesIndexer: %v", err) | |||
} | |||
if err = populateIssueIndexer(); err != nil { | |||
log.Fatal(4, "PopulateIssuesIndex: %v", err) | |||
} | |||
} else { | |||
log.Fatal(4, "InitIssuesIndexer: %v", err) | |||
} | |||
} else { | |||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) | |||
if err != nil { | |||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) | |||
} | |||
} | |||
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength) | |||
go processIssueIndexerUpdateQueue() | |||
// TODO close issueIndexer when Gitea closes | |||
} | |||
// createIssueIndexer create an issue indexer if one does not already exist | |||
func createIssueIndexer() error { | |||
mapping := bleve.NewIndexMapping() | |||
docMapping := bleve.NewDocumentMapping() | |||
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping()) | |||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) | |||
textFieldMapping := bleve.NewTextFieldMapping() | |||
textFieldMapping.Analyzer = simple.Name | |||
docMapping.AddFieldMappingsAt("Title", textFieldMapping) | |||
docMapping.AddFieldMappingsAt("Content", textFieldMapping) | |||
mapping.AddDocumentMapping("issues", docMapping) | |||
var err error | |||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) | |||
return err | |||
} | |||
// populateIssueIndexer populate the issue indexer with issue data | |||
func populateIssueIndexer() error { | |||
for page := 1; ; page++ { | |||
repos, err := Repositories(&SearchRepoOptions{ | |||
Page: page, | |||
PageSize: 10, | |||
}) | |||
if err != nil { | |||
return fmt.Errorf("Repositories: %v", err) | |||
} | |||
if len(repos) == 0 { | |||
return nil | |||
} | |||
batch := issueIndexer.NewBatch() | |||
for _, repo := range repos { | |||
issues, err := Issues(&IssuesOptions{ | |||
RepoID: repo.ID, | |||
IsClosed: util.OptionalBoolNone, | |||
IsPull: util.OptionalBoolNone, | |||
Page: -1, // do not page | |||
}) | |||
if err != nil { | |||
return fmt.Errorf("Issues: %v", err) | |||
} | |||
for _, issue := range issues { | |||
err = batch.Index(issue.indexUID(), issue.issueData()) | |||
if err != nil { | |||
return fmt.Errorf("batch.Index: %v", err) | |||
} | |||
} | |||
} | |||
if err = issueIndexer.Batch(batch); err != nil { | |||
return fmt.Errorf("index.Batch: %v", err) | |||
} | |||
} | |||
} | |||
func processIssueIndexerUpdateQueue() { | |||
for { | |||
select { | |||
case issue := <-issueIndexerUpdateQueue: | |||
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil { | |||
log.Error(4, "issuesIndexer.Index: %v", err) | |||
} | |||
} | |||
} | |||
} | |||
// indexUID a unique identifier for an issue used in full-text indices | |||
func (issue *Issue) indexUID() string { | |||
return strconv.FormatInt(issue.ID, 36) | |||
} | |||
func (issue *Issue) issueData() *issueIndexerData { | |||
return &issueIndexerData{ | |||
ID: issue.ID, | |||
RepoID: issue.RepoID, | |||
Title: issue.Title, | |||
Content: issue.Content, | |||
} | |||
} | |||
// UpdateIssueIndexer add/update an issue to the issue indexer | |||
func UpdateIssueIndexer(issue *Issue) { | |||
go func() { | |||
issueIndexerUpdateQueue <- issue | |||
}() | |||
} |
@@ -138,6 +138,10 @@ func LoadConfigs() { | |||
} | |||
DbCfg.SSLMode = sec.Key("SSL_MODE").String() | |||
DbCfg.Path = sec.Key("PATH").MustString("data/gitea.db") | |||
sec = setting.Cfg.Section("indexer") | |||
setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve") | |||
setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) | |||
} | |||
// parsePostgreSQLHostPort parses given input in various forms defined in |
@@ -0,0 +1,14 @@ | |||
// Copyright 2016 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package indexer | |||
import ( | |||
"code.gitea.io/gitea/models" | |||
) | |||
// NewContext start indexer service | |||
func NewContext() { | |||
models.InitIssueIndexer() | |||
} |
@@ -123,6 +123,12 @@ var ( | |||
UsePostgreSQL bool | |||
UseTiDB bool | |||
// Indexer settings | |||
Indexer struct { | |||
IssuePath string | |||
UpdateQueueLength int | |||
} | |||
// Webhook settings | |||
Webhook = struct { | |||
QueueLength int |
@@ -0,0 +1,25 @@ | |||
// Copyright 2017 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package util | |||
// OptionalBool a boolean that can be "null" | |||
type OptionalBool byte | |||
const ( | |||
// OptionalBoolNone a "null" boolean value | |||
OptionalBoolNone = iota | |||
// OptionalBoolTrue a "true" boolean value | |||
OptionalBoolTrue | |||
// OptionalBoolFalse a "false" boolean value | |||
OptionalBoolFalse | |||
) | |||
// OptionalBoolOf get the corresponding OptionalBool of a bool | |||
func OptionalBoolOf(b bool) OptionalBool { | |||
if b { | |||
return OptionalBoolTrue | |||
} | |||
return OptionalBoolFalse | |||
} |
@@ -2926,6 +2926,10 @@ footer .ui.language .menu { | |||
width: 16px; | |||
text-align: center; | |||
} | |||
.navbar { | |||
display: flex; | |||
justify-content: space-between; | |||
} | |||
.ui.repository.list .item { | |||
padding-bottom: 25px; | |||
} |
@@ -13,14 +13,16 @@ import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
) | |||
// ListIssues list the issues of a repository | |||
func ListIssues(ctx *context.APIContext) { | |||
isClosed := ctx.Query("state") == "closed" | |||
issueOpts := models.IssuesOptions{ | |||
RepoID: ctx.Repo.Repository.ID, | |||
Page: ctx.QueryInt("page"), | |||
IsClosed: ctx.Query("state") == "closed", | |||
IsClosed: util.OptionalBoolOf(isClosed), | |||
} | |||
issues, err := models.Issues(&issueOpts) | |||
@@ -29,7 +31,7 @@ func ListIssues(ctx *context.APIContext) { | |||
return | |||
} | |||
if ctx.Query("state") == "all" { | |||
issueOpts.IsClosed = !issueOpts.IsClosed | |||
issueOpts.IsClosed = util.OptionalBoolOf(!isClosed) | |||
tempIssues, err := models.Issues(&issueOpts) | |||
if err != nil { | |||
ctx.Error(500, "Issues", err) |
@@ -125,7 +125,7 @@ func DeleteIssueComment(ctx *context.APIContext) { | |||
return | |||
} | |||
if err = models.DeleteCommentByID(comment.ID); err != nil { | |||
if err = models.DeleteComment(comment); err != nil { | |||
ctx.Error(500, "DeleteCommentByID", err) | |||
return | |||
} |
@@ -18,6 +18,7 @@ import ( | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/ssh" | |||
macaron "gopkg.in/macaron.v1" | |||
"code.gitea.io/gitea/modules/indexer" | |||
) | |||
func checkRunMode() { | |||
@@ -36,6 +37,7 @@ func checkRunMode() { | |||
func NewServices() { | |||
setting.NewServices() | |||
mailer.NewContext() | |||
indexer.NewContext() | |||
} | |||
// GlobalInit is for global configuration reload-able. |
@@ -5,6 +5,7 @@ | |||
package repo | |||
import ( | |||
"bytes" | |||
"errors" | |||
"fmt" | |||
"io" | |||
@@ -25,6 +26,7 @@ import ( | |||
"code.gitea.io/gitea/modules/markdown" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
) | |||
const ( | |||
@@ -158,20 +160,39 @@ func Issues(ctx *context.Context) { | |||
milestoneID := ctx.QueryInt64("milestone") | |||
isShowClosed := ctx.Query("state") == "closed" | |||
keyword := ctx.Query("q") | |||
if bytes.Contains([]byte(keyword), []byte{0x00}) { | |||
keyword = "" | |||
} | |||
var issueIDs []int64 | |||
var err error | |||
if len(keyword) > 0 { | |||
issueIDs, err = models.SearchIssuesByKeyword(repo.ID, keyword) | |||
if len(issueIDs) == 0 { | |||
forceEmpty = true | |||
} | |||
} | |||
var issueStats *models.IssueStats | |||
if forceEmpty { | |||
issueStats = &models.IssueStats{} | |||
} else { | |||
issueStats = models.GetIssueStats(&models.IssueStatsOptions{ | |||
var err error | |||
issueStats, err = models.GetIssueStats(&models.IssueStatsOptions{ | |||
RepoID: repo.ID, | |||
Labels: selectLabels, | |||
MilestoneID: milestoneID, | |||
AssigneeID: assigneeID, | |||
MentionedID: mentionedID, | |||
IsPull: isPullList, | |||
IssueIDs: issueIDs, | |||
}) | |||
if err != nil { | |||
ctx.Error(500, "GetSearchIssueStats") | |||
return | |||
} | |||
} | |||
page := ctx.QueryInt("page") | |||
if page <= 1 { | |||
page = 1 | |||
@@ -190,7 +211,6 @@ func Issues(ctx *context.Context) { | |||
if forceEmpty { | |||
issues = []*models.Issue{} | |||
} else { | |||
var err error | |||
issues, err = models.Issues(&models.IssuesOptions{ | |||
AssigneeID: assigneeID, | |||
RepoID: repo.ID, | |||
@@ -198,10 +218,11 @@ func Issues(ctx *context.Context) { | |||
MentionedID: mentionedID, | |||
MilestoneID: milestoneID, | |||
Page: pager.Current(), | |||
IsClosed: isShowClosed, | |||
IsPull: isPullList, | |||
IsClosed: util.OptionalBoolOf(isShowClosed), | |||
IsPull: util.OptionalBoolOf(isPullList), | |||
Labels: selectLabels, | |||
SortType: sortType, | |||
IssueIDs: issueIDs, | |||
}) | |||
if err != nil { | |||
ctx.Handle(500, "Issues", err) | |||
@@ -258,6 +279,7 @@ func Issues(ctx *context.Context) { | |||
ctx.Data["MilestoneID"] = milestoneID | |||
ctx.Data["AssigneeID"] = assigneeID | |||
ctx.Data["IsShowClosed"] = isShowClosed | |||
ctx.Data["Keyword"] = keyword | |||
if isShowClosed { | |||
ctx.Data["State"] = "closed" | |||
} else { | |||
@@ -934,7 +956,7 @@ func DeleteComment(ctx *context.Context) { | |||
return | |||
} | |||
if err = models.DeleteCommentByID(comment.ID); err != nil { | |||
if err = models.DeleteComment(comment); err != nil { | |||
ctx.Handle(500, "DeleteCommentByID", err) | |||
return | |||
} |
@@ -15,6 +15,7 @@ import ( | |||
"code.gitea.io/gitea/modules/base" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
) | |||
const ( | |||
@@ -277,8 +278,8 @@ func Issues(ctx *context.Context) { | |||
PosterID: posterID, | |||
RepoIDs: repoIDs, | |||
Page: page, | |||
IsClosed: isShowClosed, | |||
IsPull: isPullList, | |||
IsClosed: util.OptionalBoolOf(isShowClosed), | |||
IsPull: util.OptionalBoolOf(isPullList), | |||
SortType: sortType, | |||
}) | |||
if err != nil { |
@@ -4,6 +4,7 @@ | |||
<div class="ui container"> | |||
<div class="navbar"> | |||
{{template "repo/issue/navbar" .}} | |||
{{template "repo/issue/search" .}} | |||
<div class="ui right"> | |||
{{if .PageIsIssueList}} | |||
<a class="ui green button" href="{{.RepoLink}}/issues/new">{{.i18n.Tr "repo.issues.new"}}</a> | |||
@@ -14,11 +15,11 @@ | |||
</div> | |||
<div class="ui divider"></div> | |||
<div class="ui tiny basic status buttons"> | |||
<a class="ui {{if not .IsShowClosed}}green active{{end}} basic button" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state=open&labels={{.SelectLabels}}&milestone={{.MilestoneID}}&assignee={{.AssigneeID}}"> | |||
<a class="ui {{if not .IsShowClosed}}green active{{end}} basic button" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state=open&labels={{.SelectLabels}}&milestone={{.MilestoneID}}&assignee={{.AssigneeID}}"> | |||
<i class="octicon octicon-issue-opened"></i> | |||
{{.i18n.Tr "repo.issues.open_tab" .IssueStats.OpenCount}} | |||
</a> | |||
<a class="ui {{if .IsShowClosed}}red active{{end}} basic button" href="{{$.Link}}?type={{.ViewType}}&sort={{$.SortType}}&state=closed&labels={{.SelectLabels}}&milestone={{.MilestoneID}}&assignee={{.AssigneeID}}"> | |||
<a class="ui {{if .IsShowClosed}}red active{{end}} basic button" href="{{$.Link}}?q={{$.Keyword}}&type={{.ViewType}}&sort={{$.SortType}}&state=closed&labels={{.SelectLabels}}&milestone={{.MilestoneID}}&assignee={{.AssigneeID}}"> | |||
<i class="octicon octicon-issue-closed"></i> | |||
{{.i18n.Tr "repo.issues.close_tab" .IssueStats.ClosedCount}} | |||
</a> | |||
@@ -31,9 +32,9 @@ | |||
<i class="dropdown icon"></i> | |||
</span> | |||
<div class="menu"> | |||
<a class="item" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_label_no_select"}}</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_label_no_select"}}</a> | |||
{{range .Labels}} | |||
<a class="item" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{.ID}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}"><span class="octicon {{if eq $.SelectLabels .ID}}octicon-check{{end}}"></span><span class="label color" style="background-color: {{.Color}}"></span> {{.Name}}</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{.ID}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}"><span class="octicon {{if eq $.SelectLabels .ID}}octicon-check{{end}}"></span><span class="label color" style="background-color: {{.Color}}"></span> {{.Name}}</a> | |||
{{end}} | |||
</div> | |||
</div> | |||
@@ -45,7 +46,7 @@ | |||
<i class="dropdown icon"></i> | |||
</span> | |||
<div class="menu"> | |||
<a class="item" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_milestone_no_select"}}</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_milestone_no_select"}}</a> | |||
{{range .Milestones}} | |||
<a class="{{if eq $.MilestoneID .ID}}active selected{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{.ID}}&assignee={{$.AssigneeID}}">{{.Name}}</a> | |||
{{end}} | |||
@@ -59,7 +60,7 @@ | |||
<i class="dropdown icon"></i> | |||
</span> | |||
<div class="menu"> | |||
<a class="item" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}">{{.i18n.Tr "repo.issues.filter_assginee_no_select"}}</a> | |||
<a class="item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}">{{.i18n.Tr "repo.issues.filter_assginee_no_select"}}</a> | |||
{{range .Assignees}} | |||
<a class="{{if eq $.AssigneeID .ID}}active selected{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{.ID}}"><img src="{{.RelAvatarLink}}"> {{.Name}}</a> | |||
{{end}} | |||
@@ -73,10 +74,10 @@ | |||
<i class="dropdown icon"></i> | |||
</span> | |||
<div class="menu"> | |||
<a class="{{if eq .ViewType "all"}}active{{end}} item" href="{{$.Link}}?type=all&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.all_issues"}}</a> | |||
<a class="{{if eq .ViewType "assigned"}}active{{end}} item" href="{{$.Link}}?type=assigned&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.assigned_to_you"}}</a> | |||
<a class="{{if eq .ViewType "created_by"}}active{{end}} item" href="{{$.Link}}?type=created_by&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.created_by_you"}}</a> | |||
<a class="{{if eq .ViewType "mentioned"}}active{{end}} item" href="{{$.Link}}?type=mentioned&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.mentioning_you"}}</a> | |||
<a class="{{if eq .ViewType "all"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type=all&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.all_issues"}}</a> | |||
<a class="{{if eq .ViewType "assigned"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type=assigned&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.assigned_to_you"}}</a> | |||
<a class="{{if eq .ViewType "created_by"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type=created_by&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.created_by_you"}}</a> | |||
<a class="{{if eq .ViewType "mentioned"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type=mentioned&sort={{$.SortType}}&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_type.mentioning_you"}}</a> | |||
</div> | |||
</div> | |||
@@ -87,12 +88,12 @@ | |||
<i class="dropdown icon"></i> | |||
</span> | |||
<div class="menu"> | |||
<a class="{{if or (eq .SortType "latest") (not .SortType)}}active{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort=latest&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.latest"}}</a> | |||
<a class="{{if eq .SortType "oldest"}}active{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort=oldest&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.oldest"}}</a> | |||
<a class="{{if eq .SortType "recentupdate"}}active{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort=recentupdate&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.recentupdate"}}</a> | |||
<a class="{{if eq .SortType "leastupdate"}}active{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort=leastupdate&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.leastupdate"}}</a> | |||
<a class="{{if eq .SortType "mostcomment"}}active{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort=mostcomment&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.mostcomment"}}</a> | |||
<a class="{{if eq .SortType "leastcomment"}}active{{end}} item" href="{{$.Link}}?type={{$.ViewType}}&sort=leastcomment&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.leastcomment"}}</a> | |||
<a class="{{if or (eq .SortType "latest") (not .SortType)}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort=latest&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.latest"}}</a> | |||
<a class="{{if eq .SortType "oldest"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort=oldest&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.oldest"}}</a> | |||
<a class="{{if eq .SortType "recentupdate"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort=recentupdate&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.recentupdate"}}</a> | |||
<a class="{{if eq .SortType "leastupdate"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort=leastupdate&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.leastupdate"}}</a> | |||
<a class="{{if eq .SortType "mostcomment"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort=mostcomment&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.mostcomment"}}</a> | |||
<a class="{{if eq .SortType "leastcomment"}}active{{end}} item" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort=leastcomment&state={{$.State}}&labels={{.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}">{{.i18n.Tr "repo.issues.filter_sort.leastcomment"}}</a> | |||
</div> | |||
</div> | |||
</div> | |||
@@ -105,7 +106,7 @@ | |||
<a class="title has-emoji" href="{{$.Link}}/{{.Index}}">{{.Title}}</a> | |||
{{range .Labels}} | |||
<a class="ui label" href="{{$.Link}}?type={{$.ViewType}}&state={{$.State}}&labels={{.ID}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}" style="color: {{.ForegroundColor}}; background-color: {{.Color}}">{{.Name}}</a> | |||
<a class="ui label" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&state={{$.State}}&labels={{.ID}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}" style="color: {{.ForegroundColor}}; background-color: {{.Color}}">{{.Name}}</a> | |||
{{end}} | |||
{{if .NumComments}} | |||
@@ -115,7 +116,7 @@ | |||
<p class="desc"> | |||
{{$.i18n.Tr "repo.issues.opened_by" $timeStr .Poster.HomeLink .Poster.Name | Safe}} | |||
{{if .Milestone}} | |||
<a class="milestone" href="{{$.Link}}?type={{$.ViewType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{.Milestone.ID}}&assignee={{$.AssigneeID}}"> | |||
<a class="milestone" href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{.Milestone.ID}}&assignee={{$.AssigneeID}}"> | |||
<span class="octicon octicon-milestone"></span> {{.Milestone.Name}} | |||
</a> | |||
{{end}} | |||
@@ -132,17 +133,17 @@ | |||
{{if gt .TotalPages 1}} | |||
<div class="center page buttons"> | |||
<div class="ui borderless pagination menu"> | |||
<a class="{{if not .HasPrevious}}disabled{{end}} item" {{if .HasPrevious}}href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}&page={{.Previous}}"{{end}}> | |||
<a class="{{if not .HasPrevious}}disabled{{end}} item" {{if .HasPrevious}}href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}&page={{.Previous}}"{{end}}> | |||
<i class="left arrow icon"></i> {{$.i18n.Tr "repo.issues.previous"}} | |||
</a> | |||
{{range .Pages}} | |||
{{if eq .Num -1}} | |||
<a class="disabled item">...</a> | |||
{{else}} | |||
<a class="{{if .IsCurrent}}active{{end}} item" {{if not .IsCurrent}}href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}&page={{.Num}}"{{end}}>{{.Num}}</a> | |||
<a class="{{if .IsCurrent}}active{{end}} item" {{if not .IsCurrent}}href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}&page={{.Num}}"{{end}}>{{.Num}}</a> | |||
{{end}} | |||
{{end}} | |||
<a class="{{if not .HasNext}}disabled{{end}} item" {{if .HasNext}}href="{{$.Link}}?type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}&page={{.Next}}"{{end}}> | |||
<a class="{{if not .HasNext}}disabled{{end}} item" {{if .HasNext}}href="{{$.Link}}?q={{$.Keyword}}&type={{$.ViewType}}&sort={{$.SortType}}&state={{$.State}}&labels={{$.SelectLabels}}&milestone={{$.MilestoneID}}&assignee={{$.AssigneeID}}&page={{.Next}}"{{end}}> | |||
{{$.i18n.Tr "repo.issues.next"}} <i class="icon right arrow"></i> | |||
</a> | |||
</div> |
@@ -1,4 +1,4 @@ | |||
<div class="ui compact small menu"> | |||
<div class="ui compact left small menu"> | |||
<a class="{{if .PageIsLabels}}active{{end}} item" href="{{.RepoLink}}/labels">{{.i18n.Tr "repo.labels"}}</a> | |||
<a class="{{if .PageIsMilestones}}active{{end}} item" href="{{.RepoLink}}/milestones">{{.i18n.Tr "repo.milestones"}}</a> | |||
</div> |
@@ -0,0 +1,13 @@ | |||
<form class="ui form"> | |||
<div class="ui fluid action input"> | |||
<input type="hidden" name="type" value="{{$.ViewType}}"/> | |||
<input type="hidden" name="state" value="{{$.State}}"/> | |||
<input type="hidden" name="labels" value="{{.SelectLabels}}"/> | |||
<input type="hidden" name="milestone" value="{{$.MilestoneID}}"/> | |||
<input type="hidden" name="assignee" value="{{$.AssigneeID}}"/> | |||
<div class="ui search action input"> | |||
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "explore.search"}}..." autofocus> | |||
</div> | |||
<button class="ui blue button" type="submit">{{.i18n.Tr "explore.search"}}</button> | |||
</div> | |||
</form> |
@@ -0,0 +1,16 @@ | |||
# Contributing to Bleve | |||
We look forward to your contributions, but ask that you first review these guidelines. | |||
### Sign the CLA | |||
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. | |||
### Submitting a Pull Request | |||
All types of contributions are welcome, but please keep the following in mind: | |||
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged. | |||
- Existing tests should continue to pass, new tests for the contribution are nice to have. | |||
- All code should have gone through `go fmt` | |||
- All code should pass `go vet` |
@@ -0,0 +1,202 @@ | |||
Apache License | |||
Version 2.0, January 2004 | |||
http://www.apache.org/licenses/ | |||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | |||
1. Definitions. | |||
"License" shall mean the terms and conditions for use, reproduction, | |||
and distribution as defined by Sections 1 through 9 of this document. | |||
"Licensor" shall mean the copyright owner or entity authorized by | |||
the copyright owner that is granting the License. | |||
"Legal Entity" shall mean the union of the acting entity and all | |||
other entities that control, are controlled by, or are under common | |||
control with that entity. For the purposes of this definition, | |||
"control" means (i) the power, direct or indirect, to cause the | |||
direction or management of such entity, whether by contract or | |||
otherwise, or (ii) ownership of fifty percent (50%) or more of the | |||
outstanding shares, or (iii) beneficial ownership of such entity. | |||
"You" (or "Your") shall mean an individual or Legal Entity | |||
exercising permissions granted by this License. | |||
"Source" form shall mean the preferred form for making modifications, | |||
including but not limited to software source code, documentation | |||
source, and configuration files. | |||
"Object" form shall mean any form resulting from mechanical | |||
transformation or translation of a Source form, including but | |||
not limited to compiled object code, generated documentation, | |||
and conversions to other media types. | |||
"Work" shall mean the work of authorship, whether in Source or | |||
Object form, made available under the License, as indicated by a | |||
copyright notice that is included in or attached to the work | |||
(an example is provided in the Appendix below). | |||
"Derivative Works" shall mean any work, whether in Source or Object | |||
form, that is based on (or derived from) the Work and for which the | |||
editorial revisions, annotations, elaborations, or other modifications | |||
represent, as a whole, an original work of authorship. For the purposes | |||
of this License, Derivative Works shall not include works that remain | |||
separable from, or merely link (or bind by name) to the interfaces of, | |||
the Work and Derivative Works thereof. | |||
"Contribution" shall mean any work of authorship, including | |||
the original version of the Work and any modifications or additions | |||
to that Work or Derivative Works thereof, that is intentionally | |||
submitted to Licensor for inclusion in the Work by the copyright owner | |||
or by an individual or Legal Entity authorized to submit on behalf of | |||
the copyright owner. For the purposes of this definition, "submitted" | |||
means any form of electronic, verbal, or written communication sent | |||
to the Licensor or its representatives, including but not limited to | |||
communication on electronic mailing lists, source code control systems, | |||
and issue tracking systems that are managed by, or on behalf of, the | |||
Licensor for the purpose of discussing and improving the Work, but | |||
excluding communication that is conspicuously marked or otherwise | |||
designated in writing by the copyright owner as "Not a Contribution." | |||
"Contributor" shall mean Licensor and any individual or Legal Entity | |||
on behalf of whom a Contribution has been received by Licensor and | |||
subsequently incorporated within the Work. | |||
2. Grant of Copyright License. Subject to the terms and conditions of | |||
this License, each Contributor hereby grants to You a perpetual, | |||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||
copyright license to reproduce, prepare Derivative Works of, | |||
publicly display, publicly perform, sublicense, and distribute the | |||
Work and such Derivative Works in Source or Object form. | |||
3. Grant of Patent License. Subject to the terms and conditions of | |||
this License, each Contributor hereby grants to You a perpetual, | |||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||
(except as stated in this section) patent license to make, have made, | |||
use, offer to sell, sell, import, and otherwise transfer the Work, | |||
where such license applies only to those patent claims licensable | |||
by such Contributor that are necessarily infringed by their | |||
Contribution(s) alone or by combination of their Contribution(s) | |||
with the Work to which such Contribution(s) was submitted. If You | |||
institute patent litigation against any entity (including a | |||
cross-claim or counterclaim in a lawsuit) alleging that the Work | |||
or a Contribution incorporated within the Work constitutes direct | |||
or contributory patent infringement, then any patent licenses | |||
granted to You under this License for that Work shall terminate | |||
as of the date such litigation is filed. | |||
4. Redistribution. You may reproduce and distribute copies of the | |||
Work or Derivative Works thereof in any medium, with or without | |||
modifications, and in Source or Object form, provided that You | |||
meet the following conditions: | |||
(a) You must give any other recipients of the Work or | |||
Derivative Works a copy of this License; and | |||
(b) You must cause any modified files to carry prominent notices | |||
stating that You changed the files; and | |||
(c) You must retain, in the Source form of any Derivative Works | |||
that You distribute, all copyright, patent, trademark, and | |||
attribution notices from the Source form of the Work, | |||
excluding those notices that do not pertain to any part of | |||
the Derivative Works; and | |||
(d) If the Work includes a "NOTICE" text file as part of its | |||
distribution, then any Derivative Works that You distribute must | |||
include a readable copy of the attribution notices contained | |||
within such NOTICE file, excluding those notices that do not | |||
pertain to any part of the Derivative Works, in at least one | |||
of the following places: within a NOTICE text file distributed | |||
as part of the Derivative Works; within the Source form or | |||
documentation, if provided along with the Derivative Works; or, | |||
within a display generated by the Derivative Works, if and | |||
wherever such third-party notices normally appear. The contents | |||
of the NOTICE file are for informational purposes only and | |||
do not modify the License. You may add Your own attribution | |||
notices within Derivative Works that You distribute, alongside | |||
or as an addendum to the NOTICE text from the Work, provided | |||
that such additional attribution notices cannot be construed | |||
as modifying the License. | |||
You may add Your own copyright statement to Your modifications and | |||
may provide additional or different license terms and conditions | |||
for use, reproduction, or distribution of Your modifications, or | |||
for any such Derivative Works as a whole, provided Your use, | |||
reproduction, and distribution of the Work otherwise complies with | |||
the conditions stated in this License. | |||
5. Submission of Contributions. Unless You explicitly state otherwise, | |||
any Contribution intentionally submitted for inclusion in the Work | |||
by You to the Licensor shall be under the terms and conditions of | |||
this License, without any additional terms or conditions. | |||
Notwithstanding the above, nothing herein shall supersede or modify | |||
the terms of any separate license agreement you may have executed | |||
with Licensor regarding such Contributions. | |||
6. Trademarks. This License does not grant permission to use the trade | |||
names, trademarks, service marks, or product names of the Licensor, | |||
except as required for reasonable and customary use in describing the | |||
origin of the Work and reproducing the content of the NOTICE file. | |||
7. Disclaimer of Warranty. Unless required by applicable law or | |||
agreed to in writing, Licensor provides the Work (and each | |||
Contributor provides its Contributions) on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
implied, including, without limitation, any warranties or conditions | |||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | |||
PARTICULAR PURPOSE. You are solely responsible for determining the | |||
appropriateness of using or redistributing the Work and assume any | |||
risks associated with Your exercise of permissions under this License. | |||
8. Limitation of Liability. In no event and under no legal theory, | |||
whether in tort (including negligence), contract, or otherwise, | |||
unless required by applicable law (such as deliberate and grossly | |||
negligent acts) or agreed to in writing, shall any Contributor be | |||
liable to You for damages, including any direct, indirect, special, | |||
incidental, or consequential damages of any character arising as a | |||
result of this License or out of the use or inability to use the | |||
Work (including but not limited to damages for loss of goodwill, | |||
work stoppage, computer failure or malfunction, or any and all | |||
other commercial damages or losses), even if such Contributor | |||
has been advised of the possibility of such damages. | |||
9. Accepting Warranty or Additional Liability. While redistributing | |||
the Work or Derivative Works thereof, You may choose to offer, | |||
and charge a fee for, acceptance of support, warranty, indemnity, | |||
or other liability obligations and/or rights consistent with this | |||
License. However, in accepting such obligations, You may act only | |||
on Your own behalf and on Your sole responsibility, not on behalf | |||
of any other Contributor, and only if You agree to indemnify, | |||
defend, and hold each Contributor harmless for any liability | |||
incurred by, or claims asserted against, such Contributor by reason | |||
of your accepting any such warranty or additional liability. | |||
END OF TERMS AND CONDITIONS | |||
APPENDIX: How to apply the Apache License to your work. | |||
To apply the Apache License to your work, attach the following | |||
boilerplate notice, with the fields enclosed by brackets "[]" | |||
replaced with your own identifying information. (Don't include | |||
the brackets!) The text should be enclosed in the appropriate | |||
comment syntax for the file format. We also recommend that a | |||
file or class name and description of purpose be included on the | |||
same "printed page" as the copyright notice for easier | |||
identification within third-party archives. | |||
Copyright [yyyy] [name of copyright owner] | |||
Licensed under the Apache License, Version 2.0 (the "License"); | |||
you may not use this file except in compliance with the License. | |||
You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. |
@@ -0,0 +1,62 @@ | |||
# ![bleve](docs/bleve.png) bleve | |||
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) | |||
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |||
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve) | |||
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve) | |||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) | |||
Try out bleve live by [searching the bleve website](http://www.blevesearch.com/search/?q=bleve). | |||
## Features | |||
* Index any go data structure (including JSON) | |||
* Intelligent defaults backed up by powerful configuration | |||
* Supported field types: | |||
* Text, Numeric, Date | |||
* Supported query types: | |||
* Term, Phrase, Match, Match Phrase, Prefix | |||
* Conjunction, Disjunction, Boolean | |||
* Numeric Range, Date Range | |||
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry | |||
* tf-idf Scoring | |||
* Search result match highlighting | |||
* Supports Aggregating Facets: | |||
* Terms Facet | |||
* Numeric Range Facet | |||
* Date Range Facet | |||
## Discussion | |||
Discuss usage and development of bleve in the [google group](https://groups.google.com/forum/#!forum/bleve). | |||
## Indexing | |||
message := struct{ | |||
Id string | |||
From string | |||
Body string | |||
}{ | |||
Id: "example", | |||
From: "marty.schoch@gmail.com", | |||
Body: "bleve indexing is easy", | |||
} | |||
mapping := bleve.NewIndexMapping() | |||
index, err := bleve.New("example.bleve", mapping) | |||
if err != nil { | |||
panic(err) | |||
} | |||
index.Index(message.Id, message) | |||
## Querying | |||
index, _ := bleve.Open("example.bleve") | |||
query := bleve.NewQueryStringQuery("bleve") | |||
searchRequest := bleve.NewSearchRequest(query) | |||
searchResult, _ := index.Search(searchRequest) | |||
## License | |||
Apache License Version 2.0 |
@@ -0,0 +1,46 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package simple | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/letter" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "simple" | |||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { | |||
tokenizer, err := cache.TokenizerNamed(letter.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := analysis.Analyzer{ | |||
Tokenizer: tokenizer, | |||
TokenFilters: []analysis.TokenFilter{ | |||
toLowerFilter, | |||
}, | |||
} | |||
return &rv, nil | |||
} | |||
func init() { | |||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) | |||
} |
@@ -0,0 +1,52 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package standard | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/lang/en" | |||
"github.com/blevesearch/bleve/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "standard" | |||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { | |||
tokenizer, err := cache.TokenizerNamed(unicode.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := analysis.Analyzer{ | |||
Tokenizer: tokenizer, | |||
TokenFilters: []analysis.TokenFilter{ | |||
toLowerFilter, | |||
stopEnFilter, | |||
}, | |||
} | |||
return &rv, nil | |||
} | |||
func init() { | |||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) | |||
} |
@@ -0,0 +1,64 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package flexible | |||
import ( | |||
"fmt" | |||
"time" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "flexiblego" | |||
type DateTimeParser struct { | |||
layouts []string | |||
} | |||
func New(layouts []string) *DateTimeParser { | |||
return &DateTimeParser{ | |||
layouts: layouts, | |||
} | |||
} | |||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) { | |||
for _, layout := range p.layouts { | |||
rv, err := time.Parse(layout, input) | |||
if err == nil { | |||
return rv, nil | |||
} | |||
} | |||
return time.Time{}, analysis.ErrInvalidDateTime | |||
} | |||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { | |||
layouts, ok := config["layouts"].([]interface{}) | |||
if !ok { | |||
return nil, fmt.Errorf("must specify layouts") | |||
} | |||
var layoutStrs []string | |||
for _, layout := range layouts { | |||
layoutStr, ok := layout.(string) | |||
if ok { | |||
layoutStrs = append(layoutStrs, layoutStr) | |||
} | |||
} | |||
return New(layoutStrs), nil | |||
} | |||
func init() { | |||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) | |||
} |
@@ -0,0 +1,45 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package optional | |||
import ( | |||
"time" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/datetime/flexible" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "dateTimeOptional" | |||
const rfc3339NoTimezone = "2006-01-02T15:04:05" | |||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05" | |||
const rfc3339NoTime = "2006-01-02" | |||
var layouts = []string{ | |||
time.RFC3339Nano, | |||
time.RFC3339, | |||
rfc3339NoTimezone, | |||
rfc3339NoTimezoneNoT, | |||
rfc3339NoTime, | |||
} | |||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { | |||
return flexible.New(layouts), nil | |||
} | |||
func init() { | |||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) | |||
} |
@@ -0,0 +1,111 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package analysis | |||
// TokenLocation represents one occurrence of a term at a particular location in | |||
// a field. Start, End and Position have the same meaning as in analysis.Token. | |||
// Field and ArrayPositions identify the field value in the source document. | |||
// See document.Field for details. | |||
type TokenLocation struct { | |||
Field string | |||
ArrayPositions []uint64 | |||
Start int | |||
End int | |||
Position int | |||
} | |||
// TokenFreq represents all the occurrences of a term in all fields of a | |||
// document. | |||
type TokenFreq struct { | |||
Term []byte | |||
Locations []*TokenLocation | |||
frequency int | |||
} | |||
func (tf *TokenFreq) Frequency() int { | |||
return tf.frequency | |||
} | |||
// TokenFrequencies maps document terms to their combined frequencies from all | |||
// fields. | |||
type TokenFrequencies map[string]*TokenFreq | |||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) { | |||
// walk the new token frequencies | |||
for tfk, tf := range other { | |||
// set the remoteField value in incoming token freqs | |||
for _, l := range tf.Locations { | |||
l.Field = remoteField | |||
} | |||
existingTf, exists := tfs[tfk] | |||
if exists { | |||
existingTf.Locations = append(existingTf.Locations, tf.Locations...) | |||
existingTf.frequency = existingTf.frequency + tf.frequency | |||
} else { | |||
tfs[tfk] = &TokenFreq{ | |||
Term: tf.Term, | |||
frequency: tf.frequency, | |||
Locations: make([]*TokenLocation, len(tf.Locations)), | |||
} | |||
copy(tfs[tfk].Locations, tf.Locations) | |||
} | |||
} | |||
} | |||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, includeTermVectors bool) TokenFrequencies { | |||
rv := make(map[string]*TokenFreq, len(tokens)) | |||
if includeTermVectors { | |||
tls := make([]TokenLocation, len(tokens)) | |||
tlNext := 0 | |||
for _, token := range tokens { | |||
tls[tlNext] = TokenLocation{ | |||
ArrayPositions: arrayPositions, | |||
Start: token.Start, | |||
End: token.End, | |||
Position: token.Position, | |||
} | |||
curr, ok := rv[string(token.Term)] | |||
if ok { | |||
curr.Locations = append(curr.Locations, &tls[tlNext]) | |||
curr.frequency++ | |||
} else { | |||
rv[string(token.Term)] = &TokenFreq{ | |||
Term: token.Term, | |||
Locations: []*TokenLocation{&tls[tlNext]}, | |||
frequency: 1, | |||
} | |||
} | |||
tlNext++ | |||
} | |||
} else { | |||
for _, token := range tokens { | |||
curr, exists := rv[string(token.Term)] | |||
if exists { | |||
curr.frequency++ | |||
} else { | |||
rv[string(token.Term)] = &TokenFreq{ | |||
Term: token.Term, | |||
frequency: 1, | |||
} | |||
} | |||
} | |||
} | |||
return rv | |||
} |
@@ -0,0 +1,70 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package en implements an analyzer with reasonable defaults for processing | |||
// English text. | |||
// | |||
// It strips possessive suffixes ('s), transforms tokens to lower case, | |||
// removes stopwords from a built-in list, and applies porter stemming. | |||
// | |||
// The built-in stopwords list is defined in EnglishStopWords. | |||
package en | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/blevesearch/bleve/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/analysis/token/porter" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" | |||
) | |||
const AnalyzerName = "en" | |||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { | |||
tokenizer, err := cache.TokenizerNamed(unicode.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
possEnFilter, err := cache.TokenFilterNamed(PossessiveName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
stopEnFilter, err := cache.TokenFilterNamed(StopName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := analysis.Analyzer{ | |||
Tokenizer: tokenizer, | |||
TokenFilters: []analysis.TokenFilter{ | |||
possEnFilter, | |||
toLowerFilter, | |||
stopEnFilter, | |||
stemmerEnFilter, | |||
}, | |||
} | |||
return &rv, nil | |||
} | |||
func init() { | |||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) | |||
} |
@@ -0,0 +1,67 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package en | |||
import ( | |||
"unicode/utf8" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
// PossessiveName is the name PossessiveFilter is registered as | |||
// in the bleve registry. | |||
const PossessiveName = "possessive_en" | |||
const rightSingleQuotationMark = '’' | |||
const apostrophe = '\'' | |||
const fullWidthApostrophe = ''' | |||
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe | |||
// PossessiveFilter implements a TokenFilter which | |||
// strips the English possessive suffix ('s) from tokens. | |||
// It handle a variety of apostrophe types, is case-insensitive | |||
// and doesn't distinguish between possessive and contraction. | |||
// (ie "She's So Rad" becomes "She So Rad") | |||
type PossessiveFilter struct { | |||
} | |||
func NewPossessiveFilter() *PossessiveFilter { | |||
return &PossessiveFilter{} | |||
} | |||
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream { | |||
for _, token := range input { | |||
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term) | |||
if lastRune == 's' || lastRune == 'S' { | |||
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize]) | |||
if nextLastRune == rightSingleQuotationMark || | |||
nextLastRune == apostrophe || | |||
nextLastRune == fullWidthApostrophe { | |||
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize] | |||
} | |||
} | |||
} | |||
return input | |||
} | |||
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
return NewPossessiveFilter(), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor) | |||
} |
@@ -0,0 +1,33 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package en | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/token/stop" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
tokenMap, err := cache.TokenMapNamed(StopName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return stop.NewStopTokensFilter(tokenMap), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) | |||
} |
@@ -0,0 +1,344 @@ | |||
package en | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const StopName = "stop_en" | |||
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter. | |||
// | |||
// this content was obtained from: | |||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ | |||
// ` was changed to ' to allow for literal string | |||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt | |||
| This file is distributed under the BSD License. | |||
| See http://snowball.tartarus.org/license.php | |||
| Also see http://www.opensource.org/licenses/bsd-license.html | |||
| - Encoding was converted to UTF-8. | |||
| - This notice was added. | |||
| | |||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | |||
| An English stop word list. Comments begin with vertical bar. Each stop | |||
| word is at the start of a line. | |||
| Many of the forms below are quite rare (e.g. "yourselves") but included for | |||
| completeness. | |||
| PRONOUNS FORMS | |||
| 1st person sing | |||
i | subject, always in upper case of course | |||
me | object | |||
my | possessive adjective | |||
| the possessive pronoun 'mine' is best suppressed, because of the | |||
| sense of coal-mine etc. | |||
myself | reflexive | |||
| 1st person plural | |||
we | subject | |||
| us | object | |||
| care is required here because US = United States. It is usually | |||
| safe to remove it if it is in lower case. | |||
our | possessive adjective | |||
ours | possessive pronoun | |||
ourselves | reflexive | |||
| second person (archaic 'thou' forms not included) | |||
you | subject and object | |||
your | possessive adjective | |||
yours | possessive pronoun | |||
yourself | reflexive (singular) | |||
yourselves | reflexive (plural) | |||
| third person singular | |||
he | subject | |||
him | object | |||
his | possessive adjective and pronoun | |||
himself | reflexive | |||
she | subject | |||
her | object and possessive adjective | |||
hers | possessive pronoun | |||
herself | reflexive | |||
it | subject and object | |||
its | possessive adjective | |||
itself | reflexive | |||
| third person plural | |||
they | subject | |||
them | object | |||
their | possessive adjective | |||
theirs | possessive pronoun | |||
themselves | reflexive | |||
| other forms (demonstratives, interrogatives) | |||
what | |||
which | |||
who | |||
whom | |||
this | |||
that | |||
these | |||
those | |||
| VERB FORMS (using F.R. Palmer's nomenclature) | |||
| BE | |||
am | 1st person, present | |||
is | -s form (3rd person, present) | |||
are | present | |||
was | 1st person, past | |||
were | past | |||
be | infinitive | |||
been | past participle | |||
being | -ing form | |||
| HAVE | |||
have | simple | |||
has | -s form | |||
had | past | |||
having | -ing form | |||
| DO | |||
do | simple | |||
does | -s form | |||
did | past | |||
doing | -ing form | |||
| The forms below are, I believe, best omitted, because of the significant | |||
| homonym forms: | |||
| He made a WILL | |||
| old tin CAN | |||
| merry month of MAY | |||
| a smell of MUST | |||
| fight the good fight with all thy MIGHT | |||
| would, could, should, ought might however be included | |||
| | AUXILIARIES | |||
| | WILL | |||
|will | |||
would | |||
| | SHALL | |||
|shall | |||
should | |||
| | CAN | |||
|can | |||
could | |||
| | MAY | |||
|may | |||
|might | |||
| | MUST | |||
|must | |||
| | OUGHT | |||
ought | |||
| COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing | |||
| pronoun + verb | |||
i'm | |||
you're | |||
he's | |||
she's | |||
it's | |||
we're | |||
they're | |||
i've | |||
you've | |||
we've | |||
they've | |||
i'd | |||
you'd | |||
he'd | |||
she'd | |||
we'd | |||
they'd | |||
i'll | |||
you'll | |||
he'll | |||
she'll | |||
we'll | |||
they'll | |||
| verb + negation | |||
isn't | |||
aren't | |||
wasn't | |||
weren't | |||
hasn't | |||
haven't | |||
hadn't | |||
doesn't | |||
don't | |||
didn't | |||
| auxiliary + negation | |||
won't | |||
wouldn't | |||
shan't | |||
shouldn't | |||
can't | |||
cannot | |||
couldn't | |||
mustn't | |||
| miscellaneous forms | |||
let's | |||
that's | |||
who's | |||
what's | |||
here's | |||
there's | |||
when's | |||
where's | |||
why's | |||
how's | |||
| rarer forms | |||
| daren't needn't | |||
| doubtful forms | |||
| oughtn't mightn't | |||
| ARTICLES | |||
a | |||
an | |||
the | |||
| THE REST (Overlap among prepositions, conjunctions, adverbs etc is so | |||
| high, that classification is pointless.) | |||
and | |||
but | |||
if | |||
or | |||
because | |||
as | |||
until | |||
while | |||
of | |||
at | |||
by | |||
for | |||
with | |||
about | |||
against | |||
between | |||
into | |||
through | |||
during | |||
before | |||
after | |||
above | |||
below | |||
to | |||
from | |||
up | |||
down | |||
in | |||
out | |||
on | |||
off | |||
over | |||
under | |||
again | |||
further | |||
then | |||
once | |||
here | |||
there | |||
when | |||
where | |||
why | |||
how | |||
all | |||
any | |||
both | |||
each | |||
few | |||
more | |||
most | |||
other | |||
some | |||
such | |||
no | |||
nor | |||
not | |||
only | |||
own | |||
same | |||
so | |||
than | |||
too | |||
very | |||
| Just for the record, the following words are among the commonest in English | |||
| one | |||
| every | |||
| least | |||
| less | |||
| many | |||
| now | |||
| ever | |||
| never | |||
| say | |||
| says | |||
| said | |||
| also | |||
| get | |||
| go | |||
| goes | |||
| just | |||
| made | |||
| make | |||
| put | |||
| see | |||
| seen | |||
| whether | |||
| like | |||
| well | |||
| back | |||
| even | |||
| still | |||
| way | |||
| take | |||
| since | |||
| another | |||
| however | |||
| two | |||
| three | |||
| four | |||
| five | |||
| first | |||
| second | |||
| new | |||
| old | |||
| high | |||
| long | |||
`) | |||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { | |||
rv := analysis.NewTokenMap() | |||
err := rv.LoadBytes(EnglishStopWords) | |||
return rv, err | |||
} | |||
func init() { | |||
registry.RegisterTokenMap(StopName, TokenMapConstructor) | |||
} |
@@ -0,0 +1,7 @@ | |||
# full line comment | |||
marty | |||
steve # trailing comment | |||
| different format of comment | |||
dustin | |||
siri | different style trailing comment | |||
multiple words with different whitespace |
@@ -0,0 +1,105 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package lowercase implements a TokenFilter which converts | |||
// tokens to lower case according to unicode rules. | |||
package lowercase | |||
import ( | |||
"bytes" | |||
"unicode" | |||
"unicode/utf8" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
// Name is the name used to register LowerCaseFilter in the bleve registry | |||
const Name = "to_lower" | |||
type LowerCaseFilter struct { | |||
} | |||
func NewLowerCaseFilter() *LowerCaseFilter { | |||
return &LowerCaseFilter{} | |||
} | |||
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream { | |||
for _, token := range input { | |||
token.Term = toLowerDeferredCopy(token.Term) | |||
} | |||
return input | |||
} | |||
func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
return NewLowerCaseFilter(), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor) | |||
} | |||
// toLowerDeferredCopy will function exactly like | |||
// bytes.ToLower() only it will reuse (overwrite) | |||
// the original byte array when possible | |||
// NOTE: because its possible that the lower-case | |||
// form of a rune has a different utf-8 encoded | |||
// length, in these cases a new byte array is allocated | |||
func toLowerDeferredCopy(s []byte) []byte { | |||
j := 0 | |||
for i := 0; i < len(s); { | |||
wid := 1 | |||
r := rune(s[i]) | |||
if r >= utf8.RuneSelf { | |||
r, wid = utf8.DecodeRune(s[i:]) | |||
} | |||
l := unicode.ToLower(r) | |||
// If the rune is already lowercased, just move to the | |||
// next rune. | |||
if l == r { | |||
i += wid | |||
j += wid | |||
continue | |||
} | |||
// Handles the Unicode edge-case where the last | |||
// rune in a word on the greek Σ needs to be converted | |||
// differently. | |||
if l == 'σ' && i+2 == len(s) { | |||
l = 'ς' | |||
} | |||
lwid := utf8.RuneLen(l) | |||
if lwid > wid { | |||
// utf-8 encoded replacement is wider | |||
// for now, punt and defer | |||
// to bytes.ToLower() for the remainder | |||
// only known to happen with chars | |||
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3 | |||
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3 | |||
rest := bytes.ToLower(s[i:]) | |||
rv := make([]byte, j+len(rest)) | |||
copy(rv[:j], s[:j]) | |||
copy(rv[j:], rest) | |||
return rv | |||
} else { | |||
utf8.EncodeRune(s[j:], l) | |||
} | |||
i += wid | |||
j += lwid | |||
} | |||
return s[:j] | |||
} |
@@ -0,0 +1,53 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package porter | |||
import ( | |||
"bytes" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/blevesearch/go-porterstemmer" | |||
) | |||
const Name = "stemmer_porter" | |||
type PorterStemmer struct { | |||
} | |||
func NewPorterStemmer() *PorterStemmer { | |||
return &PorterStemmer{} | |||
} | |||
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream { | |||
for _, token := range input { | |||
// if it is not a protected keyword, stem it | |||
if !token.KeyWord { | |||
termRunes := bytes.Runes(token.Term) | |||
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes) | |||
token.Term = analysis.BuildTermFromRunes(stemmedRunes) | |||
} | |||
} | |||
return input | |||
} | |||
func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
return NewPorterStemmer(), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenFilter(Name, PorterStemmerConstructor) | |||
} |
@@ -0,0 +1,70 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package stop implements a TokenFilter removing tokens found in | |||
// a TokenMap. | |||
// | |||
// It constructor takes the following arguments: | |||
// | |||
// "stop_token_map" (string): the name of the token map identifying tokens to | |||
// remove. | |||
package stop | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "stop_tokens" | |||
type StopTokensFilter struct { | |||
stopTokens analysis.TokenMap | |||
} | |||
func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter { | |||
return &StopTokensFilter{ | |||
stopTokens: stopTokens, | |||
} | |||
} | |||
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream { | |||
j := 0 | |||
for _, token := range input { | |||
_, isStopToken := f.stopTokens[string(token.Term)] | |||
if !isStopToken { | |||
input[j] = token | |||
j++ | |||
} | |||
} | |||
return input[:j] | |||
} | |||
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
stopTokenMapName, ok := config["stop_token_map"].(string) | |||
if !ok { | |||
return nil, fmt.Errorf("must specify stop_token_map") | |||
} | |||
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName) | |||
if err != nil { | |||
return nil, fmt.Errorf("error building stop words filter: %v", err) | |||
} | |||
return NewStopTokensFilter(stopTokenMap), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenFilter(Name, StopTokensFilterConstructor) | |||
} |
@@ -0,0 +1,76 @@ | |||
// Copyright (c) 2016 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package character | |||
import ( | |||
"unicode/utf8" | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
type IsTokenRune func(r rune) bool | |||
type CharacterTokenizer struct { | |||
isTokenRun IsTokenRune | |||
} | |||
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer { | |||
return &CharacterTokenizer{ | |||
isTokenRun: f, | |||
} | |||
} | |||
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream { | |||
rv := make(analysis.TokenStream, 0, 1024) | |||
offset := 0 | |||
start := 0 | |||
end := 0 | |||
count := 0 | |||
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) { | |||
isToken := c.isTokenRun(currRune) | |||
if isToken { | |||
end = offset + size | |||
} else { | |||
if end-start > 0 { | |||
// build token | |||
rv = append(rv, &analysis.Token{ | |||
Term: input[start:end], | |||
Start: start, | |||
End: end, | |||
Position: count + 1, | |||
Type: analysis.AlphaNumeric, | |||
}) | |||
count++ | |||
} | |||
start = offset + size | |||
end = start | |||
} | |||
offset += size | |||
} | |||
// if we ended in the middle of a token, finish it | |||
if end-start > 0 { | |||
// build token | |||
rv = append(rv, &analysis.Token{ | |||
Term: input[start:end], | |||
Start: start, | |||
End: end, | |||
Position: count + 1, | |||
Type: analysis.AlphaNumeric, | |||
}) | |||
} | |||
return rv | |||
} |
@@ -0,0 +1,33 @@ | |||
// Copyright (c) 2016 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package letter | |||
import ( | |||
"unicode" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/character" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "letter" | |||
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { | |||
return character.NewCharacterTokenizer(unicode.IsLetter), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenizer(Name, TokenizerConstructor) | |||
} |
@@ -0,0 +1,131 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package unicode | |||
import ( | |||
"github.com/blevesearch/segment" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "unicode" | |||
type UnicodeTokenizer struct { | |||
} | |||
func NewUnicodeTokenizer() *UnicodeTokenizer { | |||
return &UnicodeTokenizer{} | |||
} | |||
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream { | |||
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx. | |||
rv := make(analysis.TokenStream, 0, 1) | |||
ta := []analysis.Token(nil) | |||
taNext := 0 | |||
segmenter := segment.NewWordSegmenterDirect(input) | |||
start := 0 | |||
pos := 1 | |||
guessRemaining := func(end int) int { | |||
avgSegmentLen := end / (len(rv) + 1) | |||
if avgSegmentLen < 1 { | |||
avgSegmentLen = 1 | |||
} | |||
remainingLen := len(input) - end | |||
return remainingLen / avgSegmentLen | |||
} | |||
for segmenter.Segment() { | |||
segmentBytes := segmenter.Bytes() | |||
end := start + len(segmentBytes) | |||
if segmenter.Type() != segment.None { | |||
if taNext >= len(ta) { | |||
remainingSegments := guessRemaining(end) | |||
if remainingSegments > 1000 { | |||
remainingSegments = 1000 | |||
} | |||
if remainingSegments < 1 { | |||
remainingSegments = 1 | |||
} | |||
ta = make([]analysis.Token, remainingSegments) | |||
taNext = 0 | |||
} | |||
token := &ta[taNext] | |||
taNext++ | |||
token.Term = segmentBytes | |||
token.Start = start | |||
token.End = end | |||
token.Position = pos | |||
token.Type = convertType(segmenter.Type()) | |||
if len(rv) >= cap(rv) { // When rv is full, save it into rvx. | |||
rvx = append(rvx, rv) | |||
rvCap := cap(rv) * 2 | |||
if rvCap > 256 { | |||
rvCap = 256 | |||
} | |||
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger. | |||
} | |||
rv = append(rv, token) | |||
pos++ | |||
} | |||
start = end | |||
} | |||
if len(rvx) > 0 { | |||
n := len(rv) | |||
for _, r := range rvx { | |||
n += len(r) | |||
} | |||
rall := make(analysis.TokenStream, 0, n) | |||
for _, r := range rvx { | |||
rall = append(rall, r...) | |||
} | |||
return append(rall, rv...) | |||
} | |||
return rv | |||
} | |||
func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { | |||
return NewUnicodeTokenizer(), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor) | |||
} | |||
func convertType(segmentWordType int) analysis.TokenType { | |||
switch segmentWordType { | |||
case segment.Ideo: | |||
return analysis.Ideographic | |||
case segment.Kana: | |||
return analysis.Ideographic | |||
case segment.Number: | |||
return analysis.Numeric | |||
} | |||
return analysis.AlphaNumeric | |||
} |
@@ -0,0 +1,76 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package analysis | |||
import ( | |||
"bufio" | |||
"bytes" | |||
"io" | |||
"io/ioutil" | |||
"strings" | |||
) | |||
type TokenMap map[string]bool | |||
func NewTokenMap() TokenMap { | |||
return make(TokenMap, 0) | |||
} | |||
// LoadFile reads in a list of tokens from a text file, | |||
// one per line. | |||
// Comments are supported using `#` or `|` | |||
func (t TokenMap) LoadFile(filename string) error { | |||
data, err := ioutil.ReadFile(filename) | |||
if err != nil { | |||
return err | |||
} | |||
return t.LoadBytes(data) | |||
} | |||
// LoadBytes reads in a list of tokens from memory, | |||
// one per line. | |||
// Comments are supported using `#` or `|` | |||
func (t TokenMap) LoadBytes(data []byte) error { | |||
bytesReader := bytes.NewReader(data) | |||
bufioReader := bufio.NewReader(bytesReader) | |||
line, err := bufioReader.ReadString('\n') | |||
for err == nil { | |||
t.LoadLine(line) | |||
line, err = bufioReader.ReadString('\n') | |||
} | |||
// if the err was EOF we still need to process the last value | |||
if err == io.EOF { | |||
t.LoadLine(line) | |||
return nil | |||
} | |||
return err | |||
} | |||
func (t TokenMap) LoadLine(line string) { | |||
// find the start of a comment, if any | |||
startComment := strings.IndexAny(line, "#|") | |||
if startComment >= 0 { | |||
line = line[:startComment] | |||
} | |||
tokens := strings.Fields(line) | |||
for _, token := range tokens { | |||
t.AddToken(token) | |||
} | |||
} | |||
func (t TokenMap) AddToken(token string) { | |||
t[token] = true | |||
} |
@@ -0,0 +1,103 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package analysis | |||
import ( | |||
"fmt" | |||
"time" | |||
) | |||
type CharFilter interface { | |||
Filter([]byte) []byte | |||
} | |||
type TokenType int | |||
const ( | |||
AlphaNumeric TokenType = iota | |||
Ideographic | |||
Numeric | |||
DateTime | |||
Shingle | |||
Single | |||
Double | |||
Boolean | |||
) | |||
// Token represents one occurrence of a term at a particular location in a | |||
// field. | |||
type Token struct { | |||
// Start specifies the byte offset of the beginning of the term in the | |||
// field. | |||
Start int `json:"start"` | |||
// End specifies the byte offset of the end of the term in the field. | |||
End int `json:"end"` | |||
Term []byte `json:"term"` | |||
// Position specifies the 1-based index of the token in the sequence of | |||
// occurrences of its term in the field. | |||
Position int `json:"position"` | |||
Type TokenType `json:"type"` | |||
KeyWord bool `json:"keyword"` | |||
} | |||
func (t *Token) String() string { | |||
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type) | |||
} | |||
type TokenStream []*Token | |||
// A Tokenizer splits an input string into tokens, the usual behaviour being to | |||
// map words to tokens. | |||
type Tokenizer interface { | |||
Tokenize([]byte) TokenStream | |||
} | |||
// A TokenFilter adds, transforms or removes tokens from a token stream. | |||
type TokenFilter interface { | |||
Filter(TokenStream) TokenStream | |||
} | |||
type Analyzer struct { | |||
CharFilters []CharFilter | |||
Tokenizer Tokenizer | |||
TokenFilters []TokenFilter | |||
} | |||
func (a *Analyzer) Analyze(input []byte) TokenStream { | |||
if a.CharFilters != nil { | |||
for _, cf := range a.CharFilters { | |||
input = cf.Filter(input) | |||
} | |||
} | |||
tokens := a.Tokenizer.Tokenize(input) | |||
if a.TokenFilters != nil { | |||
for _, tf := range a.TokenFilters { | |||
tokens = tf.Filter(tokens) | |||
} | |||
} | |||
return tokens | |||
} | |||
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts") | |||
type DateTimeParser interface { | |||
ParseDateTime(string) (time.Time, error) | |||
} | |||
type ByteArrayConverter interface { | |||
Convert([]byte) (interface{}, error) | |||
} |
@@ -0,0 +1,92 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package analysis | |||
import ( | |||
"bytes" | |||
"unicode/utf8" | |||
) | |||
func DeleteRune(in []rune, pos int) []rune { | |||
if pos >= len(in) { | |||
return in | |||
} | |||
copy(in[pos:], in[pos+1:]) | |||
return in[:len(in)-1] | |||
} | |||
func InsertRune(in []rune, pos int, r rune) []rune { | |||
// create a new slice 1 rune larger | |||
rv := make([]rune, len(in)+1) | |||
// copy the characters before the insert pos | |||
copy(rv[0:pos], in[0:pos]) | |||
// set the inserted rune | |||
rv[pos] = r | |||
// copy the characters after the insert pos | |||
copy(rv[pos+1:], in[pos:]) | |||
return rv | |||
} | |||
// BuildTermFromRunesOptimistic will build a term from the provided runes | |||
// AND optimistically attempt to encode into the provided buffer | |||
// if at any point it appears the buffer is too small, a new buffer is | |||
// allocated and that is used instead | |||
// this should be used in cases where frequently the new term is the same | |||
// length or shorter than the original term (in number of bytes) | |||
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte { | |||
rv := buf | |||
used := 0 | |||
for _, r := range runes { | |||
nextLen := utf8.RuneLen(r) | |||
if used+nextLen > len(rv) { | |||
// alloc new buf | |||
buf = make([]byte, len(runes)*utf8.UTFMax) | |||
// copy work we've already done | |||
copy(buf, rv[:used]) | |||
rv = buf | |||
} | |||
written := utf8.EncodeRune(rv[used:], r) | |||
used += written | |||
} | |||
return rv[:used] | |||
} | |||
func BuildTermFromRunes(runes []rune) []byte { | |||
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes) | |||
} | |||
func TruncateRunes(input []byte, num int) []byte { | |||
runes := bytes.Runes(input) | |||
runes = runes[:len(runes)-num] | |||
out := BuildTermFromRunes(runes) | |||
return out | |||
} | |||
func RunesEndsWith(input []rune, suffix string) bool { | |||
inputLen := len(input) | |||
suffixRunes := []rune(suffix) | |||
suffixLen := len(suffixRunes) | |||
if suffixLen > inputLen { | |||
return false | |||
} | |||
for i := suffixLen - 1; i >= 0; i-- { | |||
if input[inputLen-(suffixLen-i)] != suffixRunes[i] { | |||
return false | |||
} | |||
} | |||
return true | |||
} |
@@ -0,0 +1,88 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"expvar" | |||
"io/ioutil" | |||
"log" | |||
"time" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store/gtreap" | |||
"github.com/blevesearch/bleve/index/upsidedown" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/blevesearch/bleve/search/highlight/highlighter/html" | |||
) | |||
var bleveExpVar = expvar.NewMap("bleve") | |||
type configuration struct { | |||
Cache *registry.Cache | |||
DefaultHighlighter string | |||
DefaultKVStore string | |||
DefaultMemKVStore string | |||
DefaultIndexType string | |||
SlowSearchLogThreshold time.Duration | |||
analysisQueue *index.AnalysisQueue | |||
} | |||
func (c *configuration) SetAnalysisQueueSize(n int) { | |||
c.analysisQueue = index.NewAnalysisQueue(n) | |||
} | |||
func newConfiguration() *configuration { | |||
return &configuration{ | |||
Cache: registry.NewCache(), | |||
analysisQueue: index.NewAnalysisQueue(4), | |||
} | |||
} | |||
// Config contains library level configuration | |||
var Config *configuration | |||
func init() { | |||
bootStart := time.Now() | |||
// build the default configuration | |||
Config = newConfiguration() | |||
// set the default highlighter | |||
Config.DefaultHighlighter = html.Name | |||
// default kv store | |||
Config.DefaultKVStore = "" | |||
// default mem only kv store | |||
Config.DefaultMemKVStore = gtreap.Name | |||
// default index | |||
Config.DefaultIndexType = upsidedown.Name | |||
bootDuration := time.Since(bootStart) | |||
bleveExpVar.Add("bootDuration", int64(bootDuration)) | |||
indexStats = NewIndexStats() | |||
bleveExpVar.Set("indexes", indexStats) | |||
initDisk() | |||
} | |||
var logger = log.New(ioutil.Discard, "bleve", log.LstdFlags) | |||
// SetLog sets the logger used for logging | |||
// by default log messages are sent to ioutil.Discard | |||
func SetLog(l *log.Logger) { | |||
logger = l | |||
} |
@@ -0,0 +1,23 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// +build appengine appenginevm | |||
package bleve | |||
// in the appengine environment we cannot support disk based indexes | |||
// so we do no extra configuration in this method | |||
func initDisk() { | |||
} |
@@ -0,0 +1,25 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// +build !appengine,!appenginevm | |||
package bleve | |||
import "github.com/blevesearch/bleve/index/store/boltdb" | |||
// in normal environments we configure boltdb as the default storage | |||
func initDisk() { | |||
// default kv store | |||
Config.DefaultKVStore = boltdb.Name | |||
} |
@@ -0,0 +1,38 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
/* | |||
Package bleve is a library for indexing and searching text. | |||
Example Opening New Index, Indexing Data | |||
message := struct{ | |||
Id: "example" | |||
From: "marty.schoch@gmail.com", | |||
Body: "bleve indexing is easy", | |||
} | |||
mapping := bleve.NewIndexMapping() | |||
index, _ := bleve.New("example.bleve", mapping) | |||
index.Index(message.Id, message) | |||
Example Opening Existing Index, Searching Data | |||
index, _ := bleve.Open("example.bleve") | |||
query := bleve.NewQueryStringQuery("bleve") | |||
searchRequest := bleve.NewSearchRequest(query) | |||
searchResult, _ := index.Search(searchRequest) | |||
*/ | |||
package bleve |
@@ -0,0 +1,75 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import "fmt" | |||
type Document struct { | |||
ID string `json:"id"` | |||
Fields []Field `json:"fields"` | |||
CompositeFields []*CompositeField | |||
Number uint64 `json:"-"` | |||
} | |||
func NewDocument(id string) *Document { | |||
return &Document{ | |||
ID: id, | |||
Fields: make([]Field, 0), | |||
CompositeFields: make([]*CompositeField, 0), | |||
} | |||
} | |||
func (d *Document) AddField(f Field) *Document { | |||
switch f := f.(type) { | |||
case *CompositeField: | |||
d.CompositeFields = append(d.CompositeFields, f) | |||
default: | |||
d.Fields = append(d.Fields, f) | |||
} | |||
return d | |||
} | |||
func (d *Document) GoString() string { | |||
fields := "" | |||
for i, field := range d.Fields { | |||
if i != 0 { | |||
fields += ", " | |||
} | |||
fields += fmt.Sprintf("%#v", field) | |||
} | |||
compositeFields := "" | |||
for i, field := range d.CompositeFields { | |||
if i != 0 { | |||
compositeFields += ", " | |||
} | |||
compositeFields += fmt.Sprintf("%#v", field) | |||
} | |||
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields) | |||
} | |||
func (d *Document) NumPlainTextBytes() uint64 { | |||
rv := uint64(0) | |||
for _, field := range d.Fields { | |||
rv += field.NumPlainTextBytes() | |||
} | |||
for _, compositeField := range d.CompositeFields { | |||
for _, field := range d.Fields { | |||
if compositeField.includesField(field.Name()) { | |||
rv += field.NumPlainTextBytes() | |||
} | |||
} | |||
} | |||
return rv | |||
} |
@@ -0,0 +1,39 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
type Field interface { | |||
// Name returns the path of the field from the root DocumentMapping. | |||
// A root field path is "field", a subdocument field is "parent.field". | |||
Name() string | |||
// ArrayPositions returns the intermediate document and field indices | |||
// required to resolve the field value in the document. For example, if the | |||
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or | |||
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in | |||
// "doc1", then "field" in "doc2". | |||
ArrayPositions() []uint64 | |||
Options() IndexingOptions | |||
Analyze() (int, analysis.TokenFrequencies) | |||
Value() []byte | |||
// NumPlainTextBytes should return the number of plain text bytes | |||
// that this field represents - this is a common metric for tracking | |||
// the rate of indexing | |||
NumPlainTextBytes() uint64 | |||
} |
@@ -0,0 +1,107 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
const DefaultBooleanIndexingOptions = StoreField | IndexField | |||
type BooleanField struct { | |||
name string | |||
arrayPositions []uint64 | |||
options IndexingOptions | |||
value []byte | |||
numPlainTextBytes uint64 | |||
} | |||
func (b *BooleanField) Name() string { | |||
return b.name | |||
} | |||
func (b *BooleanField) ArrayPositions() []uint64 { | |||
return b.arrayPositions | |||
} | |||
func (b *BooleanField) Options() IndexingOptions { | |||
return b.options | |||
} | |||
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) { | |||
tokens := make(analysis.TokenStream, 0) | |||
tokens = append(tokens, &analysis.Token{ | |||
Start: 0, | |||
End: len(b.value), | |||
Term: b.value, | |||
Position: 1, | |||
Type: analysis.Boolean, | |||
}) | |||
fieldLength := len(tokens) | |||
tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors()) | |||
return fieldLength, tokenFreqs | |||
} | |||
func (b *BooleanField) Value() []byte { | |||
return b.value | |||
} | |||
func (b *BooleanField) Boolean() (bool, error) { | |||
if len(b.value) == 1 { | |||
return b.value[0] == 'T', nil | |||
} | |||
return false, fmt.Errorf("boolean field has %d bytes", len(b.value)) | |||
} | |||
func (b *BooleanField) GoString() string { | |||
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value) | |||
} | |||
func (b *BooleanField) NumPlainTextBytes() uint64 { | |||
return b.numPlainTextBytes | |||
} | |||
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField { | |||
return &BooleanField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: value, | |||
options: DefaultNumericIndexingOptions, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} | |||
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField { | |||
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions) | |||
} | |||
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField { | |||
numPlainTextBytes := 5 | |||
v := []byte("F") | |||
if b { | |||
numPlainTextBytes = 4 | |||
v = []byte("T") | |||
} | |||
return &BooleanField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: v, | |||
options: options, | |||
numPlainTextBytes: uint64(numPlainTextBytes), | |||
} | |||
} |
@@ -0,0 +1,99 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
const DefaultCompositeIndexingOptions = IndexField | |||
type CompositeField struct { | |||
name string | |||
includedFields map[string]bool | |||
excludedFields map[string]bool | |||
defaultInclude bool | |||
options IndexingOptions | |||
totalLength int | |||
compositeFrequencies analysis.TokenFrequencies | |||
} | |||
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField { | |||
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions) | |||
} | |||
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options IndexingOptions) *CompositeField { | |||
rv := &CompositeField{ | |||
name: name, | |||
options: options, | |||
defaultInclude: defaultInclude, | |||
includedFields: make(map[string]bool, len(include)), | |||
excludedFields: make(map[string]bool, len(exclude)), | |||
compositeFrequencies: make(analysis.TokenFrequencies), | |||
} | |||
for _, i := range include { | |||
rv.includedFields[i] = true | |||
} | |||
for _, e := range exclude { | |||
rv.excludedFields[e] = true | |||
} | |||
return rv | |||
} | |||
func (c *CompositeField) Name() string { | |||
return c.name | |||
} | |||
func (c *CompositeField) ArrayPositions() []uint64 { | |||
return []uint64{} | |||
} | |||
func (c *CompositeField) Options() IndexingOptions { | |||
return c.options | |||
} | |||
func (c *CompositeField) Analyze() (int, analysis.TokenFrequencies) { | |||
return c.totalLength, c.compositeFrequencies | |||
} | |||
func (c *CompositeField) Value() []byte { | |||
return []byte{} | |||
} | |||
func (c *CompositeField) NumPlainTextBytes() uint64 { | |||
return 0 | |||
} | |||
func (c *CompositeField) includesField(field string) bool { | |||
shouldInclude := c.defaultInclude | |||
_, fieldShouldBeIncluded := c.includedFields[field] | |||
if fieldShouldBeIncluded { | |||
shouldInclude = true | |||
} | |||
_, fieldShouldBeExcluded := c.excludedFields[field] | |||
if fieldShouldBeExcluded { | |||
shouldInclude = false | |||
} | |||
return shouldInclude | |||
} | |||
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) { | |||
if c.includesField(field) { | |||
c.totalLength += length | |||
c.compositeFrequencies.MergeAll(field, freq) | |||
} | |||
} |
@@ -0,0 +1,144 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"fmt" | |||
"math" | |||
"time" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/numeric" | |||
) | |||
const DefaultDateTimeIndexingOptions = StoreField | IndexField | |||
const DefaultDateTimePrecisionStep uint = 4 | |||
var MinTimeRepresentable = time.Unix(0, math.MinInt64) | |||
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64) | |||
type DateTimeField struct { | |||
name string | |||
arrayPositions []uint64 | |||
options IndexingOptions | |||
value numeric.PrefixCoded | |||
numPlainTextBytes uint64 | |||
} | |||
func (n *DateTimeField) Name() string { | |||
return n.name | |||
} | |||
func (n *DateTimeField) ArrayPositions() []uint64 { | |||
return n.arrayPositions | |||
} | |||
func (n *DateTimeField) Options() IndexingOptions { | |||
return n.options | |||
} | |||
func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) { | |||
tokens := make(analysis.TokenStream, 0) | |||
tokens = append(tokens, &analysis.Token{ | |||
Start: 0, | |||
End: len(n.value), | |||
Term: n.value, | |||
Position: 1, | |||
Type: analysis.DateTime, | |||
}) | |||
original, err := n.value.Int64() | |||
if err == nil { | |||
shift := DefaultDateTimePrecisionStep | |||
for shift < 64 { | |||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) | |||
if err != nil { | |||
break | |||
} | |||
token := analysis.Token{ | |||
Start: 0, | |||
End: len(shiftEncoded), | |||
Term: shiftEncoded, | |||
Position: 1, | |||
Type: analysis.DateTime, | |||
} | |||
tokens = append(tokens, &token) | |||
shift += DefaultDateTimePrecisionStep | |||
} | |||
} | |||
fieldLength := len(tokens) | |||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) | |||
return fieldLength, tokenFreqs | |||
} | |||
func (n *DateTimeField) Value() []byte { | |||
return n.value | |||
} | |||
func (n *DateTimeField) DateTime() (time.Time, error) { | |||
i64, err := n.value.Int64() | |||
if err != nil { | |||
return time.Time{}, err | |||
} | |||
return time.Unix(0, i64).UTC(), nil | |||
} | |||
func (n *DateTimeField) GoString() string { | |||
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) | |||
} | |||
func (n *DateTimeField) NumPlainTextBytes() uint64 { | |||
return n.numPlainTextBytes | |||
} | |||
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField { | |||
return &DateTimeField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: value, | |||
options: DefaultDateTimeIndexingOptions, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} | |||
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time) (*DateTimeField, error) { | |||
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, DefaultDateTimeIndexingOptions) | |||
} | |||
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, options IndexingOptions) (*DateTimeField, error) { | |||
if canRepresent(dt) { | |||
dtInt64 := dt.UnixNano() | |||
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0) | |||
return &DateTimeField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: prefixCoded, | |||
options: options, | |||
// not correct, just a place holder until we revisit how fields are | |||
// represented and can fix this better | |||
numPlainTextBytes: uint64(8), | |||
}, nil | |||
} | |||
return nil, fmt.Errorf("cannot represent %s in this type", dt) | |||
} | |||
func canRepresent(dt time.Time) bool { | |||
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) { | |||
return false | |||
} | |||
return true | |||
} |
@@ -0,0 +1,130 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/numeric" | |||
) | |||
const DefaultNumericIndexingOptions = StoreField | IndexField | |||
const DefaultPrecisionStep uint = 4 | |||
type NumericField struct { | |||
name string | |||
arrayPositions []uint64 | |||
options IndexingOptions | |||
value numeric.PrefixCoded | |||
numPlainTextBytes uint64 | |||
} | |||
func (n *NumericField) Name() string { | |||
return n.name | |||
} | |||
func (n *NumericField) ArrayPositions() []uint64 { | |||
return n.arrayPositions | |||
} | |||
func (n *NumericField) Options() IndexingOptions { | |||
return n.options | |||
} | |||
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) { | |||
tokens := make(analysis.TokenStream, 0) | |||
tokens = append(tokens, &analysis.Token{ | |||
Start: 0, | |||
End: len(n.value), | |||
Term: n.value, | |||
Position: 1, | |||
Type: analysis.Numeric, | |||
}) | |||
original, err := n.value.Int64() | |||
if err == nil { | |||
shift := DefaultPrecisionStep | |||
for shift < 64 { | |||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) | |||
if err != nil { | |||
break | |||
} | |||
token := analysis.Token{ | |||
Start: 0, | |||
End: len(shiftEncoded), | |||
Term: shiftEncoded, | |||
Position: 1, | |||
Type: analysis.Numeric, | |||
} | |||
tokens = append(tokens, &token) | |||
shift += DefaultPrecisionStep | |||
} | |||
} | |||
fieldLength := len(tokens) | |||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) | |||
return fieldLength, tokenFreqs | |||
} | |||
func (n *NumericField) Value() []byte { | |||
return n.value | |||
} | |||
func (n *NumericField) Number() (float64, error) { | |||
i64, err := n.value.Int64() | |||
if err != nil { | |||
return 0.0, err | |||
} | |||
return numeric.Int64ToFloat64(i64), nil | |||
} | |||
func (n *NumericField) GoString() string { | |||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) | |||
} | |||
func (n *NumericField) NumPlainTextBytes() uint64 { | |||
return n.numPlainTextBytes | |||
} | |||
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField { | |||
return &NumericField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: value, | |||
options: DefaultNumericIndexingOptions, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} | |||
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField { | |||
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions) | |||
} | |||
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options IndexingOptions) *NumericField { | |||
numberInt64 := numeric.Float64ToInt64(number) | |||
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0) | |||
return &NumericField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: prefixCoded, | |||
options: options, | |||
// not correct, just a place holder until we revisit how fields are | |||
// represented and can fix this better | |||
numPlainTextBytes: uint64(8), | |||
} | |||
} |
@@ -0,0 +1,119 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
const DefaultTextIndexingOptions = IndexField | |||
type TextField struct { | |||
name string | |||
arrayPositions []uint64 | |||
options IndexingOptions | |||
analyzer *analysis.Analyzer | |||
value []byte | |||
numPlainTextBytes uint64 | |||
} | |||
func (t *TextField) Name() string { | |||
return t.name | |||
} | |||
func (t *TextField) ArrayPositions() []uint64 { | |||
return t.arrayPositions | |||
} | |||
func (t *TextField) Options() IndexingOptions { | |||
return t.options | |||
} | |||
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { | |||
var tokens analysis.TokenStream | |||
if t.analyzer != nil { | |||
bytesToAnalyze := t.Value() | |||
if t.options.IsStored() { | |||
// need to copy | |||
bytesCopied := make([]byte, len(bytesToAnalyze)) | |||
copy(bytesCopied, bytesToAnalyze) | |||
bytesToAnalyze = bytesCopied | |||
} | |||
tokens = t.analyzer.Analyze(bytesToAnalyze) | |||
} else { | |||
tokens = analysis.TokenStream{ | |||
&analysis.Token{ | |||
Start: 0, | |||
End: len(t.value), | |||
Term: t.value, | |||
Position: 1, | |||
Type: analysis.AlphaNumeric, | |||
}, | |||
} | |||
} | |||
fieldLength := len(tokens) // number of tokens in this doc field | |||
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors()) | |||
return fieldLength, tokenFreqs | |||
} | |||
func (t *TextField) Value() []byte { | |||
return t.value | |||
} | |||
func (t *TextField) GoString() string { | |||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions) | |||
} | |||
func (t *TextField) NumPlainTextBytes() uint64 { | |||
return t.numPlainTextBytes | |||
} | |||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField { | |||
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions) | |||
} | |||
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField { | |||
return &TextField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
options: options, | |||
value: value, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} | |||
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField { | |||
return &TextField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
options: DefaultTextIndexingOptions, | |||
analyzer: analyzer, | |||
value: value, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} | |||
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField { | |||
return &TextField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
options: options, | |||
analyzer: analyzer, | |||
value: value, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} |
@@ -0,0 +1,55 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
type IndexingOptions int | |||
const ( | |||
IndexField IndexingOptions = 1 << iota | |||
StoreField | |||
IncludeTermVectors | |||
) | |||
func (o IndexingOptions) IsIndexed() bool { | |||
return o&IndexField != 0 | |||
} | |||
func (o IndexingOptions) IsStored() bool { | |||
return o&StoreField != 0 | |||
} | |||
func (o IndexingOptions) IncludeTermVectors() bool { | |||
return o&IncludeTermVectors != 0 | |||
} | |||
func (o IndexingOptions) String() string { | |||
rv := "" | |||
if o.IsIndexed() { | |||
rv += "INDEXED" | |||
} | |||
if o.IsStored() { | |||
if rv != "" { | |||
rv += ", " | |||
} | |||
rv += "STORE" | |||
} | |||
if o.IncludeTermVectors() { | |||
if rv != "" { | |||
rv += ", " | |||
} | |||
rv += "TV" | |||
} | |||
return rv | |||
} |
@@ -0,0 +1,52 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
// Constant Error values which can be compared to determine the type of error | |||
const ( | |||
ErrorIndexPathExists Error = iota | |||
ErrorIndexPathDoesNotExist | |||
ErrorIndexMetaMissing | |||
ErrorIndexMetaCorrupt | |||
ErrorUnknownStorageType | |||
ErrorIndexClosed | |||
ErrorAliasMulti | |||
ErrorAliasEmpty | |||
ErrorUnknownIndexType | |||
ErrorEmptyID | |||
ErrorIndexReadInconsistency | |||
) | |||
// Error represents a more strongly typed bleve error for detecting | |||
// and handling specific types of errors. | |||
type Error int | |||
func (e Error) Error() string { | |||
return errorMessages[e] | |||
} | |||
var errorMessages = map[Error]string{ | |||
ErrorIndexPathExists: "cannot create new index, path already exists", | |||
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist", | |||
ErrorIndexMetaMissing: "cannot open index, metadata missing", | |||
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt", | |||
ErrorUnknownStorageType: "unknown storage type", | |||
ErrorIndexClosed: "index is closed", | |||
ErrorAliasMulti: "cannot perform single index operation on multiple index alias", | |||
ErrorAliasEmpty: "cannot perform operation on empty alias", | |||
ErrorUnknownIndexType: "unknown index type", | |||
ErrorEmptyID: "document ID cannot be empty", | |||
ErrorIndexReadInconsistency: "index read inconsistency detected", | |||
} |
@@ -0,0 +1,243 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/blevesearch/bleve/mapping" | |||
"golang.org/x/net/context" | |||
) | |||
// A Batch groups together multiple Index and Delete | |||
// operations you would like performed at the same | |||
// time. The Batch structure is NOT thread-safe. | |||
// You should only perform operations on a batch | |||
// from a single thread at a time. Once batch | |||
// execution has started, you may not modify it. | |||
type Batch struct { | |||
index Index | |||
internal *index.Batch | |||
} | |||
// Index adds the specified index operation to the | |||
// batch. NOTE: the bleve Index is not updated | |||
// until the batch is executed. | |||
func (b *Batch) Index(id string, data interface{}) error { | |||
if id == "" { | |||
return ErrorEmptyID | |||
} | |||
doc := document.NewDocument(id) | |||
err := b.index.Mapping().MapDocument(doc, data) | |||
if err != nil { | |||
return err | |||
} | |||
b.internal.Update(doc) | |||
return nil | |||
} | |||
// Delete adds the specified delete operation to the | |||
// batch. NOTE: the bleve Index is not updated until | |||
// the batch is executed. | |||
func (b *Batch) Delete(id string) { | |||
if id != "" { | |||
b.internal.Delete(id) | |||
} | |||
} | |||
// SetInternal adds the specified set internal | |||
// operation to the batch. NOTE: the bleve Index is | |||
// not updated until the batch is executed. | |||
func (b *Batch) SetInternal(key, val []byte) { | |||
b.internal.SetInternal(key, val) | |||
} | |||
// SetInternal adds the specified delete internal | |||
// operation to the batch. NOTE: the bleve Index is | |||
// not updated until the batch is executed. | |||
func (b *Batch) DeleteInternal(key []byte) { | |||
b.internal.DeleteInternal(key) | |||
} | |||
// Size returns the total number of operations inside the batch | |||
// including normal index operations and internal operations. | |||
func (b *Batch) Size() int { | |||
return len(b.internal.IndexOps) + len(b.internal.InternalOps) | |||
} | |||
// String prints a user friendly string representation of what | |||
// is inside this batch. | |||
func (b *Batch) String() string { | |||
return b.internal.String() | |||
} | |||
// Reset returns a Batch to the empty state so that it can | |||
// be re-used in the future. | |||
func (b *Batch) Reset() { | |||
b.internal.Reset() | |||
} | |||
// An Index implements all the indexing and searching | |||
// capabilities of bleve. An Index can be created | |||
// using the New() and Open() methods. | |||
// | |||
// Index() takes an input value, deduces a DocumentMapping for its type, | |||
// assigns string paths to its fields or values then applies field mappings on | |||
// them. | |||
// | |||
// The DocumentMapping used to index a value is deduced by the following rules: | |||
// 1) If value implements Classifier interface, resolve the mapping from Type(). | |||
// 2) If value has a string field or value at IndexMapping.TypeField. | |||
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing | |||
// is described below. | |||
// 3) If IndexMapping.DefaultType is registered, return it. | |||
// 4) Return IndexMapping.DefaultMapping. | |||
// | |||
// Each field or nested field of the value is identified by a string path, then | |||
// mapped to one or several FieldMappings which extract the result for analysis. | |||
// | |||
// Struct values fields are identified by their "json:" tag, or by their name. | |||
// Nested fields are identified by prefixing with their parent identifier, | |||
// separated by a dot. | |||
// | |||
// Map values entries are identified by their string key. Entries not indexed | |||
// by strings are ignored. Entry values are identified recursively like struct | |||
// fields. | |||
// | |||
// Slice and array values are identified by their field name. Their elements | |||
// are processed sequentially with the same FieldMapping. | |||
// | |||
// String, float64 and time.Time values are identified by their field name. | |||
// Other types are ignored. | |||
// | |||
// Each value identifier is decomposed in its parts and recursively address | |||
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a | |||
// mapping is found, all its FieldMappings are applied to the value. If no | |||
// mapping is found and the root DocumentMapping is dynamic, default mappings | |||
// are used based on value type and IndexMapping default configurations. | |||
// | |||
// Finally, mapped values are analyzed, indexed or stored. See | |||
// FieldMapping.Analyzer to know how an analyzer is resolved for a given field. | |||
// | |||
// Examples: | |||
// | |||
// type Date struct { | |||
// Day string `json:"day"` | |||
// Month string | |||
// Year string | |||
// } | |||
// | |||
// type Person struct { | |||
// FirstName string `json:"first_name"` | |||
// LastName string | |||
// BirthDate Date `json:"birth_date"` | |||
// } | |||
// | |||
// A Person value FirstName is mapped by the SubDocumentMapping at | |||
// "first_name". Its LastName is mapped by the one at "LastName". The day of | |||
// BirthDate is mapped to the SubDocumentMapping "day" of the root | |||
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day" | |||
// field in the index. The month is mapped to "birth_date.Month". | |||
type Index interface { | |||
// Index analyzes, indexes or stores mapped data fields. Supplied | |||
// identifier is bound to analyzed data and will be retrieved by search | |||
// requests. See Index interface documentation for details about mapping | |||
// rules. | |||
Index(id string, data interface{}) error | |||
Delete(id string) error | |||
NewBatch() *Batch | |||
Batch(b *Batch) error | |||
// Document returns specified document or nil if the document is not | |||
// indexed or stored. | |||
Document(id string) (*document.Document, error) | |||
// DocCount returns the number of documents in the index. | |||
DocCount() (uint64, error) | |||
Search(req *SearchRequest) (*SearchResult, error) | |||
SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) | |||
Fields() ([]string, error) | |||
FieldDict(field string) (index.FieldDict, error) | |||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) | |||
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) | |||
Close() error | |||
Mapping() mapping.IndexMapping | |||
Stats() *IndexStat | |||
StatsMap() map[string]interface{} | |||
GetInternal(key []byte) ([]byte, error) | |||
SetInternal(key, val []byte) error | |||
DeleteInternal(key []byte) error | |||
// Name returns the name of the index (by default this is the path) | |||
Name() string | |||
// SetName lets you assign your own logical name to this index | |||
SetName(string) | |||
// Advanced returns the indexer and data store, exposing lower level | |||
// methods to enumerate records and access data. | |||
Advanced() (index.Index, store.KVStore, error) | |||
} | |||
// New index at the specified path, must not exist. | |||
// The provided mapping will be used for all | |||
// Index/Search operations. | |||
func New(path string, mapping mapping.IndexMapping) (Index, error) { | |||
return newIndexUsing(path, mapping, Config.DefaultIndexType, Config.DefaultKVStore, nil) | |||
} | |||
// NewMemOnly creates a memory-only index. | |||
// The contents of the index is NOT persisted, | |||
// and will be lost once closed. | |||
// The provided mapping will be used for all | |||
// Index/Search operations. | |||
func NewMemOnly(mapping mapping.IndexMapping) (Index, error) { | |||
return newIndexUsing("", mapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) | |||
} | |||
// NewUsing creates index at the specified path, | |||
// which must not already exist. | |||
// The provided mapping will be used for all | |||
// Index/Search operations. | |||
// The specified index type will be used. | |||
// The specified kvstore implementation will be used | |||
// and the provided kvconfig will be passed to its | |||
// constructor. Note that currently the values of kvconfig must | |||
// be able to be marshaled and unmarshaled using the encoding/json library (used | |||
// when reading/writing the index metadata file). | |||
func NewUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) { | |||
return newIndexUsing(path, mapping, indexType, kvstore, kvconfig) | |||
} | |||
// Open index at the specified path, must exist. | |||
// The mapping used when it was created will be used for all Index/Search operations. | |||
func Open(path string) (Index, error) { | |||
return openIndexUsing(path, nil) | |||
} | |||
// OpenUsing opens index at the specified path, must exist. | |||
// The mapping used when it was created will be used for all Index/Search operations. | |||
// The provided runtimeConfig can override settings | |||
// persisted when the kvstore was created. | |||
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) { | |||
return openIndexUsing(path, runtimeConfig) | |||
} |
@@ -0,0 +1,83 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package index | |||
import "github.com/blevesearch/bleve/document" | |||
type IndexRow interface { | |||
KeySize() int | |||
KeyTo([]byte) (int, error) | |||
Key() []byte | |||
ValueSize() int | |||
ValueTo([]byte) (int, error) | |||
Value() []byte | |||
} | |||
type AnalysisResult struct { | |||
DocID string | |||
Rows []IndexRow | |||
} | |||
type AnalysisWork struct { | |||
i Index | |||
d *document.Document | |||
rc chan *AnalysisResult | |||
} | |||
func NewAnalysisWork(i Index, d *document.Document, rc chan *AnalysisResult) *AnalysisWork { | |||
return &AnalysisWork{ | |||
i: i, | |||
d: d, | |||
rc: rc, | |||
} | |||
} | |||
type AnalysisQueue struct { | |||
queue chan *AnalysisWork | |||
done chan struct{} | |||
} | |||
func (q *AnalysisQueue) Queue(work *AnalysisWork) { | |||
q.queue <- work | |||
} | |||
func (q *AnalysisQueue) Close() { | |||
close(q.done) | |||
} | |||
func NewAnalysisQueue(numWorkers int) *AnalysisQueue { | |||
rv := AnalysisQueue{ | |||
queue: make(chan *AnalysisWork), | |||
done: make(chan struct{}), | |||
} | |||
for i := 0; i < numWorkers; i++ { | |||
go AnalysisWorker(rv) | |||
} | |||
return &rv | |||
} | |||
func AnalysisWorker(q AnalysisQueue) { | |||
// read work off the queue | |||
for { | |||
select { | |||
case <-q.done: | |||
return | |||
case w := <-q.queue: | |||
r := w.i.Analyze(w.d) | |||
w.rc <- r | |||
} | |||
} | |||
} |
@@ -0,0 +1,88 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package index | |||
import ( | |||
"sync" | |||
) | |||
type FieldCache struct { | |||
fieldIndexes map[string]uint16 | |||
indexFields []string | |||
lastFieldIndex int | |||
mutex sync.RWMutex | |||
} | |||
func NewFieldCache() *FieldCache { | |||
return &FieldCache{ | |||
fieldIndexes: make(map[string]uint16), | |||
lastFieldIndex: -1, | |||
} | |||
} | |||
func (f *FieldCache) AddExisting(field string, index uint16) { | |||
f.mutex.Lock() | |||
f.addLOCKED(field, index) | |||
f.mutex.Unlock() | |||
} | |||
func (f *FieldCache) addLOCKED(field string, index uint16) uint16 { | |||
f.fieldIndexes[field] = index | |||
if len(f.indexFields) < int(index)+1 { | |||
prevIndexFields := f.indexFields | |||
f.indexFields = make([]string, int(index)+16) | |||
copy(f.indexFields, prevIndexFields) | |||
} | |||
f.indexFields[int(index)] = field | |||
if int(index) > f.lastFieldIndex { | |||
f.lastFieldIndex = int(index) | |||
} | |||
return index | |||
} | |||
// FieldNamed returns the index of the field, and whether or not it existed | |||
// before this call. if createIfMissing is true, and new field index is assigned | |||
// but the second return value will still be false | |||
func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, bool) { | |||
f.mutex.RLock() | |||
if index, ok := f.fieldIndexes[field]; ok { | |||
f.mutex.RUnlock() | |||
return index, true | |||
} else if !createIfMissing { | |||
f.mutex.RUnlock() | |||
return 0, false | |||
} | |||
// trade read lock for write lock | |||
f.mutex.RUnlock() | |||
f.mutex.Lock() | |||
// need to check again with write lock | |||
if index, ok := f.fieldIndexes[field]; ok { | |||
f.mutex.Unlock() | |||
return index, true | |||
} | |||
// assign next field id | |||
index := f.addLOCKED(field, uint16(f.lastFieldIndex+1)) | |||
f.mutex.Unlock() | |||
return index, false | |||
} | |||
func (f *FieldCache) FieldIndexed(index uint16) (field string) { | |||
f.mutex.RLock() | |||
if int(index) < len(f.indexFields) { | |||
field = f.indexFields[int(index)] | |||
} | |||
f.mutex.RUnlock() | |||
return field | |||
} |
@@ -0,0 +1,239 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package index | |||
import ( | |||
"bytes" | |||
"encoding/json" | |||
"fmt" | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") | |||
type Index interface { | |||
Open() error | |||
Close() error | |||
Update(doc *document.Document) error | |||
Delete(id string) error | |||
Batch(batch *Batch) error | |||
SetInternal(key, val []byte) error | |||
DeleteInternal(key []byte) error | |||
// Reader returns a low-level accessor on the index data. Close it to | |||
// release associated resources. | |||
Reader() (IndexReader, error) | |||
Stats() json.Marshaler | |||
StatsMap() map[string]interface{} | |||
Analyze(d *document.Document) *AnalysisResult | |||
Advanced() (store.KVStore, error) | |||
} | |||
type IndexReader interface { | |||
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error) | |||
// DocIDReader returns an iterator over all doc ids | |||
// The caller must close returned instance to release associated resources. | |||
DocIDReaderAll() (DocIDReader, error) | |||
DocIDReaderOnly(ids []string) (DocIDReader, error) | |||
FieldDict(field string) (FieldDict, error) | |||
// FieldDictRange is currently defined to include the start and end terms | |||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error) | |||
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) | |||
Document(id string) (*document.Document, error) | |||
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error) | |||
Fields() ([]string, error) | |||
GetInternal(key []byte) ([]byte, error) | |||
DocCount() (uint64, error) | |||
ExternalID(id IndexInternalID) (string, error) | |||
InternalID(id string) (IndexInternalID, error) | |||
DumpAll() chan interface{} | |||
DumpDoc(id string) chan interface{} | |||
DumpFields() chan interface{} | |||
Close() error | |||
} | |||
// FieldTerms contains the terms used by a document, keyed by field | |||
type FieldTerms map[string][]string | |||
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields | |||
func (f FieldTerms) FieldsNotYetCached(fields []string) []string { | |||
rv := make([]string, 0, len(fields)) | |||
for _, field := range fields { | |||
if _, ok := f[field]; !ok { | |||
rv = append(rv, field) | |||
} | |||
} | |||
return rv | |||
} | |||
// Merge will combine two FieldTerms | |||
// it assumes that the terms lists are complete (thus do not need to be merged) | |||
// field terms from the other list always replace the ones in the receiver | |||
func (f FieldTerms) Merge(other FieldTerms) { | |||
for field, terms := range other { | |||
f[field] = terms | |||
} | |||
} | |||
type TermFieldVector struct { | |||
Field string | |||
ArrayPositions []uint64 | |||
Pos uint64 | |||
Start uint64 | |||
End uint64 | |||
} | |||
// IndexInternalID is an opaque document identifier interal to the index impl | |||
type IndexInternalID []byte | |||
func (id IndexInternalID) Equals(other IndexInternalID) bool { | |||
return id.Compare(other) == 0 | |||
} | |||
func (id IndexInternalID) Compare(other IndexInternalID) int { | |||
return bytes.Compare(id, other) | |||
} | |||
type TermFieldDoc struct { | |||
Term string | |||
ID IndexInternalID | |||
Freq uint64 | |||
Norm float64 | |||
Vectors []*TermFieldVector | |||
} | |||
// Reset allows an already allocated TermFieldDoc to be reused | |||
func (tfd *TermFieldDoc) Reset() *TermFieldDoc { | |||
// remember the []byte used for the ID | |||
id := tfd.ID | |||
// idiom to copy over from empty TermFieldDoc (0 allocations) | |||
*tfd = TermFieldDoc{} | |||
// reuse the []byte already allocated (and reset len to 0) | |||
tfd.ID = id[:0] | |||
return tfd | |||
} | |||
// TermFieldReader is the interface exposing the enumeration of documents | |||
// containing a given term in a given field. Documents are returned in byte | |||
// lexicographic order over their identifiers. | |||
type TermFieldReader interface { | |||
// Next returns the next document containing the term in this field, or nil | |||
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc | |||
// is optional, and when non-nil, will be used instead of allocating memory. | |||
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) | |||
// Advance resets the enumeration at specified document or its immediate | |||
// follower. | |||
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) | |||
// Count returns the number of documents contains the term in this field. | |||
Count() uint64 | |||
Close() error | |||
} | |||
type DictEntry struct { | |||
Term string | |||
Count uint64 | |||
} | |||
type FieldDict interface { | |||
Next() (*DictEntry, error) | |||
Close() error | |||
} | |||
// DocIDReader is the interface exposing enumeration of documents identifiers. | |||
// Close the reader to release associated resources. | |||
type DocIDReader interface { | |||
// Next returns the next document internal identifier in the natural | |||
// index order, nil when the end of the sequence is reached. | |||
Next() (IndexInternalID, error) | |||
// Advance resets the iteration to the first internal identifier greater than | |||
// or equal to ID. If ID is smaller than the start of the range, the iteration | |||
// will start there instead. If ID is greater than or equal to the end of | |||
// the range, Next() call will return io.EOF. | |||
Advance(ID IndexInternalID) (IndexInternalID, error) | |||
Close() error | |||
} | |||
type Batch struct { | |||
IndexOps map[string]*document.Document | |||
InternalOps map[string][]byte | |||
} | |||
func NewBatch() *Batch { | |||
return &Batch{ | |||
IndexOps: make(map[string]*document.Document), | |||
InternalOps: make(map[string][]byte), | |||
} | |||
} | |||
func (b *Batch) Update(doc *document.Document) { | |||
b.IndexOps[doc.ID] = doc | |||
} | |||
func (b *Batch) Delete(id string) { | |||
b.IndexOps[id] = nil | |||
} | |||
func (b *Batch) SetInternal(key, val []byte) { | |||
b.InternalOps[string(key)] = val | |||
} | |||
func (b *Batch) DeleteInternal(key []byte) { | |||
b.InternalOps[string(key)] = nil | |||
} | |||
func (b *Batch) String() string { | |||
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps)) | |||
for k, v := range b.IndexOps { | |||
if v != nil { | |||
rv += fmt.Sprintf("\tINDEX - '%s'\n", k) | |||
} else { | |||
rv += fmt.Sprintf("\tDELETE - '%s'\n", k) | |||
} | |||
} | |||
for k, v := range b.InternalOps { | |||
if v != nil { | |||
rv += fmt.Sprintf("\tSET INTERNAL - '%s'\n", k) | |||
} else { | |||
rv += fmt.Sprintf("\tDELETE INTERNAL - '%s'\n", k) | |||
} | |||
} | |||
return rv | |||
} | |||
func (b *Batch) Reset() { | |||
b.IndexOps = make(map[string]*document.Document) | |||
b.InternalOps = make(map[string][]byte) | |||
} |
@@ -0,0 +1,62 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package store | |||
type op struct { | |||
K []byte | |||
V []byte | |||
} | |||
type EmulatedBatch struct { | |||
Ops []*op | |||
Merger *EmulatedMerge | |||
} | |||
func NewEmulatedBatch(mo MergeOperator) *EmulatedBatch { | |||
return &EmulatedBatch{ | |||
Ops: make([]*op, 0, 1000), | |||
Merger: NewEmulatedMerge(mo), | |||
} | |||
} | |||
func (b *EmulatedBatch) Set(key, val []byte) { | |||
ck := make([]byte, len(key)) | |||
copy(ck, key) | |||
cv := make([]byte, len(val)) | |||
copy(cv, val) | |||
b.Ops = append(b.Ops, &op{ck, cv}) | |||
} | |||
func (b *EmulatedBatch) Delete(key []byte) { | |||
ck := make([]byte, len(key)) | |||
copy(ck, key) | |||
b.Ops = append(b.Ops, &op{ck, nil}) | |||
} | |||
func (b *EmulatedBatch) Merge(key, val []byte) { | |||
ck := make([]byte, len(key)) | |||
copy(ck, key) | |||
cv := make([]byte, len(val)) | |||
copy(cv, val) | |||
b.Merger.Merge(key, val) | |||
} | |||
func (b *EmulatedBatch) Reset() { | |||
b.Ops = b.Ops[:0] | |||
} | |||
func (b *EmulatedBatch) Close() error { | |||
return nil | |||
} |
@@ -0,0 +1,85 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package boltdb | |||
import ( | |||
"bytes" | |||
"github.com/boltdb/bolt" | |||
) | |||
type Iterator struct { | |||
store *Store | |||
tx *bolt.Tx | |||
cursor *bolt.Cursor | |||
prefix []byte | |||
start []byte | |||
end []byte | |||
valid bool | |||
key []byte | |||
val []byte | |||
} | |||
func (i *Iterator) updateValid() { | |||
i.valid = (i.key != nil) | |||
if i.valid { | |||
if i.prefix != nil { | |||
i.valid = bytes.HasPrefix(i.key, i.prefix) | |||
} else if i.end != nil { | |||
i.valid = bytes.Compare(i.key, i.end) < 0 | |||
} | |||
} | |||
} | |||
func (i *Iterator) Seek(k []byte) { | |||
if i.start != nil && bytes.Compare(k, i.start) < 0 { | |||
k = i.start | |||
} | |||
if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) { | |||
if bytes.Compare(k, i.prefix) < 0 { | |||
k = i.prefix | |||
} else { | |||
i.valid = false | |||
return | |||
} | |||
} | |||
i.key, i.val = i.cursor.Seek(k) | |||
i.updateValid() | |||
} | |||
func (i *Iterator) Next() { | |||
i.key, i.val = i.cursor.Next() | |||
i.updateValid() | |||
} | |||
func (i *Iterator) Current() ([]byte, []byte, bool) { | |||
return i.key, i.val, i.valid | |||
} | |||
func (i *Iterator) Key() []byte { | |||
return i.key | |||
} | |||
func (i *Iterator) Value() []byte { | |||
return i.val | |||
} | |||
func (i *Iterator) Valid() bool { | |||
return i.valid | |||
} | |||
func (i *Iterator) Close() error { | |||
return nil | |||
} |
@@ -0,0 +1,73 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package boltdb | |||
import ( | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/boltdb/bolt" | |||
) | |||
type Reader struct { | |||
store *Store | |||
tx *bolt.Tx | |||
bucket *bolt.Bucket | |||
} | |||
func (r *Reader) Get(key []byte) ([]byte, error) { | |||
var rv []byte | |||
v := r.bucket.Get(key) | |||
if v != nil { | |||
rv = make([]byte, len(v)) | |||
copy(rv, v) | |||
} | |||
return rv, nil | |||
} | |||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { | |||
return store.MultiGet(r, keys) | |||
} | |||
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { | |||
cursor := r.bucket.Cursor() | |||
rv := &Iterator{ | |||
store: r.store, | |||
tx: r.tx, | |||
cursor: cursor, | |||
prefix: prefix, | |||
} | |||
rv.Seek(prefix) | |||
return rv | |||
} | |||
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { | |||
cursor := r.bucket.Cursor() | |||
rv := &Iterator{ | |||
store: r.store, | |||
tx: r.tx, | |||
cursor: cursor, | |||
start: start, | |||
end: end, | |||
} | |||
rv.Seek(start) | |||
return rv | |||
} | |||
func (r *Reader) Close() error { | |||
return r.tx.Rollback() | |||
} |
@@ -0,0 +1,26 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package boltdb | |||
import "encoding/json" | |||
type stats struct { | |||
s *Store | |||
} | |||
func (s *stats) MarshalJSON() ([]byte, error) { | |||
bs := s.s.db.Stats() | |||
return json.Marshal(bs) | |||
} |
@@ -0,0 +1,175 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package boltdb implements a store.KVStore on top of BoltDB. It supports the | |||
// following options: | |||
// | |||
// "bucket" (string): the name of BoltDB bucket to use, defaults to "bleve". | |||
// | |||
// "nosync" (bool): if true, set boltdb.DB.NoSync to true. It speeds up index | |||
// operations in exchange of losing integrity guarantees if indexation aborts | |||
// without closing the index. Use it when rebuilding indexes from zero. | |||
package boltdb | |||
import ( | |||
"bytes" | |||
"encoding/json" | |||
"fmt" | |||
"os" | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/boltdb/bolt" | |||
) | |||
const ( | |||
Name = "boltdb" | |||
defaultCompactBatchSize = 100 | |||
) | |||
type Store struct { | |||
path string | |||
bucket string | |||
db *bolt.DB | |||
noSync bool | |||
fillPercent float64 | |||
mo store.MergeOperator | |||
} | |||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { | |||
path, ok := config["path"].(string) | |||
if !ok { | |||
return nil, fmt.Errorf("must specify path") | |||
} | |||
if path == "" { | |||
return nil, os.ErrInvalid | |||
} | |||
bucket, ok := config["bucket"].(string) | |||
if !ok { | |||
bucket = "bleve" | |||
} | |||
noSync, _ := config["nosync"].(bool) | |||
fillPercent, ok := config["fillPercent"].(float64) | |||
if !ok { | |||
fillPercent = bolt.DefaultFillPercent | |||
} | |||
bo := &bolt.Options{} | |||
ro, ok := config["read_only"].(bool) | |||
if ok { | |||
bo.ReadOnly = ro | |||
} | |||
db, err := bolt.Open(path, 0600, bo) | |||
if err != nil { | |||
return nil, err | |||
} | |||
db.NoSync = noSync | |||
if !bo.ReadOnly { | |||
err = db.Update(func(tx *bolt.Tx) error { | |||
_, err := tx.CreateBucketIfNotExists([]byte(bucket)) | |||
return err | |||
}) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
rv := Store{ | |||
path: path, | |||
bucket: bucket, | |||
db: db, | |||
mo: mo, | |||
noSync: noSync, | |||
fillPercent: fillPercent, | |||
} | |||
return &rv, nil | |||
} | |||
func (bs *Store) Close() error { | |||
return bs.db.Close() | |||
} | |||
func (bs *Store) Reader() (store.KVReader, error) { | |||
tx, err := bs.db.Begin(false) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &Reader{ | |||
store: bs, | |||
tx: tx, | |||
bucket: tx.Bucket([]byte(bs.bucket)), | |||
}, nil | |||
} | |||
func (bs *Store) Writer() (store.KVWriter, error) { | |||
return &Writer{ | |||
store: bs, | |||
}, nil | |||
} | |||
func (bs *Store) Stats() json.Marshaler { | |||
return &stats{ | |||
s: bs, | |||
} | |||
} | |||
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches) | |||
// Removing entries is a workaround for github issue #374. | |||
func (bs *Store) CompactWithBatchSize(batchSize int) error { | |||
for { | |||
cnt := 0 | |||
err := bs.db.Batch(func(tx *bolt.Tx) error { | |||
c := tx.Bucket([]byte(bs.bucket)).Cursor() | |||
prefix := []byte("d") | |||
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() { | |||
if bytes.Equal(v, []byte{0}) { | |||
cnt++ | |||
if err := c.Delete(); err != nil { | |||
return err | |||
} | |||
if cnt == batchSize { | |||
break | |||
} | |||
} | |||
} | |||
return nil | |||
}) | |||
if err != nil { | |||
return err | |||
} | |||
if cnt == 0 { | |||
break | |||
} | |||
} | |||
return nil | |||
} | |||
// Compact calls CompactWithBatchSize with a default batch size of 100. This is a workaround | |||
// for github issue #374. | |||
func (bs *Store) Compact() error { | |||
return bs.CompactWithBatchSize(defaultCompactBatchSize) | |||
} | |||
func init() { | |||
registry.RegisterKVStore(Name, New) | |||
} |
@@ -0,0 +1,95 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package boltdb | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
type Writer struct { | |||
store *Store | |||
} | |||
func (w *Writer) NewBatch() store.KVBatch { | |||
return store.NewEmulatedBatch(w.store.mo) | |||
} | |||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { | |||
return make([]byte, options.TotalBytes), w.NewBatch(), nil | |||
} | |||
func (w *Writer) ExecuteBatch(batch store.KVBatch) (err error) { | |||
emulatedBatch, ok := batch.(*store.EmulatedBatch) | |||
if !ok { | |||
return fmt.Errorf("wrong type of batch") | |||
} | |||
tx, err := w.store.db.Begin(true) | |||
if err != nil { | |||
return | |||
} | |||
// defer function to ensure that once started, | |||
// we either Commit tx or Rollback | |||
defer func() { | |||
// if nothing went wrong, commit | |||
if err == nil { | |||
// careful to catch error here too | |||
err = tx.Commit() | |||
} else { | |||
// caller should see error that caused abort, | |||
// not success or failure of Rollback itself | |||
_ = tx.Rollback() | |||
} | |||
}() | |||
bucket := tx.Bucket([]byte(w.store.bucket)) | |||
bucket.FillPercent = w.store.fillPercent | |||
for k, mergeOps := range emulatedBatch.Merger.Merges { | |||
kb := []byte(k) | |||
existingVal := bucket.Get(kb) | |||
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) | |||
if !fullMergeOk { | |||
err = fmt.Errorf("merge operator returned failure") | |||
return | |||
} | |||
err = bucket.Put(kb, mergedVal) | |||
if err != nil { | |||
return | |||
} | |||
} | |||
for _, op := range emulatedBatch.Ops { | |||
if op.V != nil { | |||
err = bucket.Put(op.K, op.V) | |||
if err != nil { | |||
return | |||
} | |||
} else { | |||
err = bucket.Delete(op.K) | |||
if err != nil { | |||
return | |||
} | |||
} | |||
} | |||
return | |||
} | |||
func (w *Writer) Close() error { | |||
return nil | |||
} |
@@ -0,0 +1,152 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package gtreap provides an in-memory implementation of the | |||
// KVStore interfaces using the gtreap balanced-binary treap, | |||
// copy-on-write data structure. | |||
package gtreap | |||
import ( | |||
"bytes" | |||
"sync" | |||
"github.com/steveyen/gtreap" | |||
) | |||
type Iterator struct { | |||
t *gtreap.Treap | |||
m sync.Mutex | |||
cancelCh chan struct{} | |||
nextCh chan *Item | |||
curr *Item | |||
currOk bool | |||
prefix []byte | |||
start []byte | |||
end []byte | |||
} | |||
func (w *Iterator) Seek(k []byte) { | |||
if w.start != nil && bytes.Compare(k, w.start) < 0 { | |||
k = w.start | |||
} | |||
if w.prefix != nil && !bytes.HasPrefix(k, w.prefix) { | |||
if bytes.Compare(k, w.prefix) < 0 { | |||
k = w.prefix | |||
} else { | |||
var end []byte | |||
for i := len(w.prefix) - 1; i >= 0; i-- { | |||
c := w.prefix[i] | |||
if c < 0xff { | |||
end = make([]byte, i+1) | |||
copy(end, w.prefix) | |||
end[i] = c + 1 | |||
break | |||
} | |||
} | |||
k = end | |||
} | |||
} | |||
w.restart(&Item{k: k}) | |||
} | |||
func (w *Iterator) restart(start *Item) *Iterator { | |||
cancelCh := make(chan struct{}) | |||
nextCh := make(chan *Item, 1) | |||
w.m.Lock() | |||
if w.cancelCh != nil { | |||
close(w.cancelCh) | |||
} | |||
w.cancelCh = cancelCh | |||
w.nextCh = nextCh | |||
w.curr = nil | |||
w.currOk = false | |||
w.m.Unlock() | |||
go func() { | |||
if start != nil { | |||
w.t.VisitAscend(start, func(itm gtreap.Item) bool { | |||
select { | |||
case <-cancelCh: | |||
return false | |||
case nextCh <- itm.(*Item): | |||
return true | |||
} | |||
}) | |||
} | |||
close(nextCh) | |||
}() | |||
w.Next() | |||
return w | |||
} | |||
func (w *Iterator) Next() { | |||
w.m.Lock() | |||
nextCh := w.nextCh | |||
w.m.Unlock() | |||
w.curr, w.currOk = <-nextCh | |||
} | |||
func (w *Iterator) Current() ([]byte, []byte, bool) { | |||
w.m.Lock() | |||
defer w.m.Unlock() | |||
if !w.currOk || w.curr == nil { | |||
return nil, nil, false | |||
} | |||
if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) { | |||
return nil, nil, false | |||
} else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 { | |||
return nil, nil, false | |||
} | |||
return w.curr.k, w.curr.v, w.currOk | |||
} | |||
func (w *Iterator) Key() []byte { | |||
k, _, ok := w.Current() | |||
if !ok { | |||
return nil | |||
} | |||
return k | |||
} | |||
func (w *Iterator) Value() []byte { | |||
_, v, ok := w.Current() | |||
if !ok { | |||
return nil | |||
} | |||
return v | |||
} | |||
func (w *Iterator) Valid() bool { | |||
_, _, ok := w.Current() | |||
return ok | |||
} | |||
func (w *Iterator) Close() error { | |||
w.m.Lock() | |||
if w.cancelCh != nil { | |||
close(w.cancelCh) | |||
} | |||
w.cancelCh = nil | |||
w.nextCh = nil | |||
w.curr = nil | |||
w.currOk = false | |||
w.m.Unlock() | |||
return nil | |||
} |
@@ -0,0 +1,66 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package gtreap provides an in-memory implementation of the | |||
// KVStore interfaces using the gtreap balanced-binary treap, | |||
// copy-on-write data structure. | |||
package gtreap | |||
import ( | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/steveyen/gtreap" | |||
) | |||
type Reader struct { | |||
t *gtreap.Treap | |||
} | |||
func (w *Reader) Get(k []byte) (v []byte, err error) { | |||
var rv []byte | |||
itm := w.t.Get(&Item{k: k}) | |||
if itm != nil { | |||
rv = make([]byte, len(itm.(*Item).v)) | |||
copy(rv, itm.(*Item).v) | |||
return rv, nil | |||
} | |||
return nil, nil | |||
} | |||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { | |||
return store.MultiGet(r, keys) | |||
} | |||
func (w *Reader) PrefixIterator(k []byte) store.KVIterator { | |||
rv := Iterator{ | |||
t: w.t, | |||
prefix: k, | |||
} | |||
rv.restart(&Item{k: k}) | |||
return &rv | |||
} | |||
func (w *Reader) RangeIterator(start, end []byte) store.KVIterator { | |||
rv := Iterator{ | |||
t: w.t, | |||
start: start, | |||
end: end, | |||
} | |||
rv.restart(&Item{k: start}) | |||
return &rv | |||
} | |||
func (w *Reader) Close() error { | |||
return nil | |||
} |
@@ -0,0 +1,82 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package gtreap provides an in-memory implementation of the | |||
// KVStore interfaces using the gtreap balanced-binary treap, | |||
// copy-on-write data structure. | |||
package gtreap | |||
import ( | |||
"bytes" | |||
"fmt" | |||
"os" | |||
"sync" | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/steveyen/gtreap" | |||
) | |||
const Name = "gtreap" | |||
type Store struct { | |||
m sync.Mutex | |||
t *gtreap.Treap | |||
mo store.MergeOperator | |||
} | |||
type Item struct { | |||
k []byte | |||
v []byte | |||
} | |||
func itemCompare(a, b interface{}) int { | |||
return bytes.Compare(a.(*Item).k, b.(*Item).k) | |||
} | |||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { | |||
path, ok := config["path"].(string) | |||
if !ok { | |||
return nil, fmt.Errorf("must specify path") | |||
} | |||
if path != "" { | |||
return nil, os.ErrInvalid | |||
} | |||
rv := Store{ | |||
t: gtreap.NewTreap(itemCompare), | |||
mo: mo, | |||
} | |||
return &rv, nil | |||
} | |||
func (s *Store) Close() error { | |||
return nil | |||
} | |||
func (s *Store) Reader() (store.KVReader, error) { | |||
s.m.Lock() | |||
t := s.t | |||
s.m.Unlock() | |||
return &Reader{t: t}, nil | |||
} | |||
func (s *Store) Writer() (store.KVWriter, error) { | |||
return &Writer{s: s}, nil | |||
} | |||
func init() { | |||
registry.RegisterKVStore(Name, New) | |||
} |
@@ -0,0 +1,76 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// Package gtreap provides an in-memory implementation of the | |||
// KVStore interfaces using the gtreap balanced-binary treap, | |||
// copy-on-write data structure. | |||
package gtreap | |||
import ( | |||
"fmt" | |||
"math/rand" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
type Writer struct { | |||
s *Store | |||
} | |||
func (w *Writer) NewBatch() store.KVBatch { | |||
return store.NewEmulatedBatch(w.s.mo) | |||
} | |||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { | |||
return make([]byte, options.TotalBytes), w.NewBatch(), nil | |||
} | |||
func (w *Writer) ExecuteBatch(batch store.KVBatch) error { | |||
emulatedBatch, ok := batch.(*store.EmulatedBatch) | |||
if !ok { | |||
return fmt.Errorf("wrong type of batch") | |||
} | |||
w.s.m.Lock() | |||
for k, mergeOps := range emulatedBatch.Merger.Merges { | |||
kb := []byte(k) | |||
var existingVal []byte | |||
existingItem := w.s.t.Get(&Item{k: kb}) | |||
if existingItem != nil { | |||
existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v | |||
} | |||
mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps) | |||
if !fullMergeOk { | |||
return fmt.Errorf("merge operator returned failure") | |||
} | |||
w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int()) | |||
} | |||
for _, op := range emulatedBatch.Ops { | |||
if op.V != nil { | |||
w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int()) | |||
} else { | |||
w.s.t = w.s.t.Delete(&Item{k: op.K}) | |||
} | |||
} | |||
w.s.m.Unlock() | |||
return nil | |||
} | |||
func (w *Writer) Close() error { | |||
w.s = nil | |||
return nil | |||
} |
@@ -0,0 +1,174 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package store | |||
import "encoding/json" | |||
// KVStore is an abstraction for working with KV stores. Note that | |||
// in order to be used with the bleve.registry, it must also implement | |||
// a constructor function of the registry.KVStoreConstructor type. | |||
type KVStore interface { | |||
// Writer returns a KVWriter which can be used to | |||
// make changes to the KVStore. If a writer cannot | |||
// be obtained a non-nil error is returned. | |||
Writer() (KVWriter, error) | |||
// Reader returns a KVReader which can be used to | |||
// read data from the KVStore. If a reader cannot | |||
// be obtained a non-nil error is returned. | |||
Reader() (KVReader, error) | |||
// Close closes the KVStore | |||
Close() error | |||
} | |||
// KVReader is an abstraction of an **ISOLATED** reader | |||
// In this context isolated is defined to mean that | |||
// writes/deletes made after the KVReader is opened | |||
// are not observed. | |||
// Because there is usually a cost associated with | |||
// keeping isolated readers active, users should | |||
// close them as soon as they are no longer needed. | |||
type KVReader interface { | |||
// Get returns the value associated with the key | |||
// If the key does not exist, nil is returned. | |||
// The caller owns the bytes returned. | |||
Get(key []byte) ([]byte, error) | |||
// MultiGet retrieves multiple values in one call. | |||
MultiGet(keys [][]byte) ([][]byte, error) | |||
// PrefixIterator returns a KVIterator that will | |||
// visit all K/V pairs with the provided prefix | |||
PrefixIterator(prefix []byte) KVIterator | |||
// RangeIterator returns a KVIterator that will | |||
// visit all K/V pairs >= start AND < end | |||
RangeIterator(start, end []byte) KVIterator | |||
// Close closes the iterator | |||
Close() error | |||
} | |||
// KVIterator is an abstraction around key iteration | |||
type KVIterator interface { | |||
// Seek will advance the iterator to the specified key | |||
Seek(key []byte) | |||
// Next will advance the iterator to the next key | |||
Next() | |||
// Key returns the key pointed to by the iterator | |||
// The bytes returned are **ONLY** valid until the next call to Seek/Next/Close | |||
// Continued use after that requires that they be copied. | |||
Key() []byte | |||
// Value returns the value pointed to by the iterator | |||
// The bytes returned are **ONLY** valid until the next call to Seek/Next/Close | |||
// Continued use after that requires that they be copied. | |||
Value() []byte | |||
// Valid returns whether or not the iterator is in a valid state | |||
Valid() bool | |||
// Current returns Key(),Value(),Valid() in a single operation | |||
Current() ([]byte, []byte, bool) | |||
// Close closes the iterator | |||
Close() error | |||
} | |||
// KVWriter is an abstraction for mutating the KVStore | |||
// KVWriter does **NOT** enforce restrictions of a single writer | |||
// if the underlying KVStore allows concurrent writes, the | |||
// KVWriter interface should also do so, it is up to the caller | |||
// to do this in a way that is safe and makes sense | |||
type KVWriter interface { | |||
// NewBatch returns a KVBatch for performing batch operations on this kvstore | |||
NewBatch() KVBatch | |||
// NewBatchEx returns a KVBatch and an associated byte array | |||
// that's pre-sized based on the KVBatchOptions. The caller can | |||
// use the returned byte array for keys and values associated with | |||
// the batch. Once the batch is either executed or closed, the | |||
// associated byte array should no longer be accessed by the | |||
// caller. | |||
NewBatchEx(KVBatchOptions) ([]byte, KVBatch, error) | |||
// ExecuteBatch will execute the KVBatch, the provided KVBatch **MUST** have | |||
// been created by the same KVStore (though not necessarily the same KVWriter) | |||
// Batch execution is atomic, either all the operations or none will be performed | |||
ExecuteBatch(batch KVBatch) error | |||
// Close closes the writer | |||
Close() error | |||
} | |||
// KVBatchOptions provides the KVWriter.NewBatchEx() method with batch | |||
// preparation and preallocation information. | |||
type KVBatchOptions struct { | |||
// TotalBytes is the sum of key and value bytes needed by the | |||
// caller for the entire batch. It affects the size of the | |||
// returned byte array of KVWrite.NewBatchEx(). | |||
TotalBytes int | |||
// NumSets is the number of Set() calls the caller will invoke on | |||
// the KVBatch. | |||
NumSets int | |||
// NumDeletes is the number of Delete() calls the caller will invoke | |||
// on the KVBatch. | |||
NumDeletes int | |||
// NumMerges is the number of Merge() calls the caller will invoke | |||
// on the KVBatch. | |||
NumMerges int | |||
} | |||
// KVBatch is an abstraction for making multiple KV mutations at once | |||
type KVBatch interface { | |||
// Set updates the key with the specified value | |||
// both key and value []byte may be reused as soon as this call returns | |||
Set(key, val []byte) | |||
// Delete removes the specified key | |||
// the key []byte may be reused as soon as this call returns | |||
Delete(key []byte) | |||
// Merge merges old value with the new value at the specified key | |||
// as prescribed by the KVStores merge operator | |||
// both key and value []byte may be reused as soon as this call returns | |||
Merge(key, val []byte) | |||
// Reset frees resources for this batch and allows reuse | |||
Reset() | |||
// Close frees resources | |||
Close() error | |||
} | |||
// KVStoreStats is an optional interface that KVStores can implement | |||
// if they're able to report any useful stats | |||
type KVStoreStats interface { | |||
// Stats returns a JSON serializable object representing stats for this KVStore | |||
Stats() json.Marshaler | |||
StatsMap() map[string]interface{} | |||
} |
@@ -0,0 +1,64 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package store | |||
// At the moment this happens to be the same interface as described by | |||
// RocksDB, but this may not always be the case. | |||
type MergeOperator interface { | |||
// FullMerge the full sequence of operands on top of the existingValue | |||
// if no value currently exists, existingValue is nil | |||
// return the merged value, and success/failure | |||
FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) | |||
// Partially merge these two operands. | |||
// If partial merge cannot be done, return nil,false, which will defer | |||
// all processing until the FullMerge is done. | |||
PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) | |||
// Name returns an identifier for the operator | |||
Name() string | |||
} | |||
type EmulatedMerge struct { | |||
Merges map[string][][]byte | |||
mo MergeOperator | |||
} | |||
func NewEmulatedMerge(mo MergeOperator) *EmulatedMerge { | |||
return &EmulatedMerge{ | |||
Merges: make(map[string][][]byte), | |||
mo: mo, | |||
} | |||
} | |||
func (m *EmulatedMerge) Merge(key, val []byte) { | |||
ops, ok := m.Merges[string(key)] | |||
if ok && len(ops) > 0 { | |||
last := ops[len(ops)-1] | |||
mergedVal, partialMergeOk := m.mo.PartialMerge(key, last, val) | |||
if partialMergeOk { | |||
// replace last entry with the result of the merge | |||
ops[len(ops)-1] = mergedVal | |||
} else { | |||
// could not partial merge, append this to the end | |||
ops = append(ops, val) | |||
} | |||
} else { | |||
ops = [][]byte{val} | |||
} | |||
m.Merges[string(key)] = ops | |||
} |
@@ -0,0 +1,33 @@ | |||
// Copyright (c) 2016 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package store | |||
// MultiGet is a helper function to retrieve mutiple keys from a | |||
// KVReader, and might be used by KVStore implementations that don't | |||
// have a native multi-get facility. | |||
func MultiGet(kvreader KVReader, keys [][]byte) ([][]byte, error) { | |||
vals := make([][]byte, 0, len(keys)) | |||
for i, key := range keys { | |||
val, err := kvreader.Get(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
vals[i] = val | |||
} | |||
return vals, nil | |||
} |
@@ -0,0 +1,110 @@ | |||
// Copyright (c) 2015 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/index" | |||
) | |||
func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult { | |||
rv := &index.AnalysisResult{ | |||
DocID: d.ID, | |||
Rows: make([]index.IndexRow, 0, 100), | |||
} | |||
docIDBytes := []byte(d.ID) | |||
// track our back index entries | |||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) | |||
// information we collate as we merge fields with same name | |||
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies) | |||
fieldLengths := make(map[uint16]int) | |||
fieldIncludeTermVectors := make(map[uint16]bool) | |||
fieldNames := make(map[uint16]string) | |||
analyzeField := func(field document.Field, storable bool) { | |||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name()) | |||
if newFieldRow != nil { | |||
rv.Rows = append(rv.Rows, newFieldRow) | |||
} | |||
fieldNames[fieldIndex] = field.Name() | |||
if field.Options().IsIndexed() { | |||
fieldLength, tokenFreqs := field.Analyze() | |||
existingFreqs := fieldTermFreqs[fieldIndex] | |||
if existingFreqs == nil { | |||
fieldTermFreqs[fieldIndex] = tokenFreqs | |||
} else { | |||
existingFreqs.MergeAll(field.Name(), tokenFreqs) | |||
fieldTermFreqs[fieldIndex] = existingFreqs | |||
} | |||
fieldLengths[fieldIndex] += fieldLength | |||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors() | |||
} | |||
if storable && field.Options().IsStored() { | |||
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries) | |||
} | |||
} | |||
// walk all the fields, record stored fields now | |||
// place information about indexed fields into map | |||
// this collates information across fields with | |||
// same names (arrays) | |||
for _, field := range d.Fields { | |||
analyzeField(field, true) | |||
} | |||
if len(d.CompositeFields) > 0 { | |||
for fieldIndex, tokenFreqs := range fieldTermFreqs { | |||
// see if any of the composite fields need this | |||
for _, compositeField := range d.CompositeFields { | |||
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs) | |||
} | |||
} | |||
for _, compositeField := range d.CompositeFields { | |||
analyzeField(compositeField, false) | |||
} | |||
} | |||
rowsCapNeeded := len(rv.Rows) + 1 | |||
for _, tokenFreqs := range fieldTermFreqs { | |||
rowsCapNeeded += len(tokenFreqs) | |||
} | |||
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) | |||
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) | |||
// walk through the collated information and process | |||
// once for each indexed field (unique name) | |||
for fieldIndex, tokenFreqs := range fieldTermFreqs { | |||
fieldLength := fieldLengths[fieldIndex] | |||
includeTermVectors := fieldIncludeTermVectors[fieldIndex] | |||
// encode this field | |||
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) | |||
} | |||
// build the back index row | |||
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) | |||
rv.Rows = append(rv.Rows, backIndexRow) | |||
return rv | |||
} |
@@ -0,0 +1,8 @@ | |||
#!/bin/sh | |||
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//` | |||
for BENCHMARK in $BENCHMARKS | |||
do | |||
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS | |||
done |
@@ -0,0 +1,172 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"bytes" | |||
"sort" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
// the functions in this file are only intended to be used by | |||
// the bleve_dump utility and the debug http handlers | |||
// if your application relies on them, you're doing something wrong | |||
// they may change or be removed at any time | |||
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) { | |||
start := prefix | |||
if start == nil { | |||
start = []byte{0} | |||
} | |||
it := kvreader.PrefixIterator(start) | |||
defer func() { | |||
cerr := it.Close() | |||
if cerr != nil { | |||
rv <- cerr | |||
} | |||
}() | |||
key, val, valid := it.Current() | |||
for valid { | |||
ck := make([]byte, len(key)) | |||
copy(ck, key) | |||
cv := make([]byte, len(val)) | |||
copy(cv, val) | |||
row, err := ParseFromKeyValue(ck, cv) | |||
if err != nil { | |||
rv <- err | |||
return | |||
} | |||
rv <- row | |||
it.Next() | |||
key, val, valid = it.Current() | |||
} | |||
} | |||
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { | |||
it := kvreader.RangeIterator(start, end) | |||
defer func() { | |||
cerr := it.Close() | |||
if cerr != nil { | |||
rv <- cerr | |||
} | |||
}() | |||
key, val, valid := it.Current() | |||
for valid { | |||
ck := make([]byte, len(key)) | |||
copy(ck, key) | |||
cv := make([]byte, len(val)) | |||
copy(cv, val) | |||
row, err := ParseFromKeyValue(ck, cv) | |||
if err != nil { | |||
rv <- err | |||
return | |||
} | |||
rv <- row | |||
it.Next() | |||
key, val, valid = it.Current() | |||
} | |||
} | |||
func (i *IndexReader) DumpAll() chan interface{} { | |||
rv := make(chan interface{}) | |||
go func() { | |||
defer close(rv) | |||
dumpRange(i.kvreader, rv, nil, nil) | |||
}() | |||
return rv | |||
} | |||
func (i *IndexReader) DumpFields() chan interface{} { | |||
rv := make(chan interface{}) | |||
go func() { | |||
defer close(rv) | |||
dumpPrefix(i.kvreader, rv, []byte{'f'}) | |||
}() | |||
return rv | |||
} | |||
type keyset [][]byte | |||
func (k keyset) Len() int { return len(k) } | |||
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] } | |||
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 } | |||
// DumpDoc returns all rows in the index related to this doc id | |||
func (i *IndexReader) DumpDoc(id string) chan interface{} { | |||
idBytes := []byte(id) | |||
rv := make(chan interface{}) | |||
go func() { | |||
defer close(rv) | |||
back, err := backIndexRowForDoc(i.kvreader, []byte(id)) | |||
if err != nil { | |||
rv <- err | |||
return | |||
} | |||
// no such doc | |||
if back == nil { | |||
return | |||
} | |||
// build sorted list of term keys | |||
keys := make(keyset, 0) | |||
for _, entry := range back.termEntries { | |||
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) | |||
key := tfr.Key() | |||
keys = append(keys, key) | |||
} | |||
sort.Sort(keys) | |||
// first add all the stored rows | |||
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc() | |||
dumpPrefix(i.kvreader, rv, storedRowPrefix) | |||
// now walk term keys in order and add them as well | |||
if len(keys) > 0 { | |||
it := i.kvreader.RangeIterator(keys[0], nil) | |||
defer func() { | |||
cerr := it.Close() | |||
if cerr != nil { | |||
rv <- cerr | |||
} | |||
}() | |||
for _, key := range keys { | |||
it.Seek(key) | |||
rkey, rval, valid := it.Current() | |||
if !valid { | |||
break | |||
} | |||
rck := make([]byte, len(rkey)) | |||
copy(rck, key) | |||
rcv := make([]byte, len(rval)) | |||
copy(rcv, rval) | |||
row, err := ParseFromKeyValue(rck, rcv) | |||
if err != nil { | |||
rv <- err | |||
return | |||
} | |||
rv <- row | |||
} | |||
} | |||
}() | |||
return rv | |||
} |
@@ -0,0 +1,78 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
type UpsideDownCouchFieldDict struct { | |||
indexReader *IndexReader | |||
iterator store.KVIterator | |||
dictRow *DictionaryRow | |||
dictEntry *index.DictEntry | |||
field uint16 | |||
} | |||
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) { | |||
startKey := NewDictionaryRow(startTerm, field, 0).Key() | |||
if endTerm == nil { | |||
endTerm = []byte{ByteSeparator} | |||
} else { | |||
endTerm = incrementBytes(endTerm) | |||
} | |||
endKey := NewDictionaryRow(endTerm, field, 0).Key() | |||
it := indexReader.kvreader.RangeIterator(startKey, endKey) | |||
return &UpsideDownCouchFieldDict{ | |||
indexReader: indexReader, | |||
iterator: it, | |||
dictRow: &DictionaryRow{}, // Pre-alloced, reused row. | |||
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry. | |||
field: field, | |||
}, nil | |||
} | |||
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { | |||
key, val, valid := r.iterator.Current() | |||
if !valid { | |||
return nil, nil | |||
} | |||
err := r.dictRow.parseDictionaryK(key) | |||
if err != nil { | |||
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err) | |||
} | |||
err = r.dictRow.parseDictionaryV(val) | |||
if err != nil { | |||
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err) | |||
} | |||
r.dictEntry.Term = string(r.dictRow.term) | |||
r.dictEntry.Count = r.dictRow.count | |||
// advance the iterator to the next term | |||
r.iterator.Next() | |||
return r.dictEntry, nil | |||
} | |||
func (r *UpsideDownCouchFieldDict) Close() error { | |||
return r.iterator.Close() | |||
} |
@@ -0,0 +1,189 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
type IndexReader struct { | |||
index *UpsideDownCouch | |||
kvreader store.KVReader | |||
docCount uint64 | |||
} | |||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { | |||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) | |||
if fieldExists { | |||
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors) | |||
} | |||
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors) | |||
} | |||
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) { | |||
return i.FieldDictRange(fieldName, nil, nil) | |||
} | |||
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { | |||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) | |||
if fieldExists { | |||
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm) | |||
} | |||
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{}) | |||
} | |||
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) { | |||
return i.FieldDictRange(fieldName, termPrefix, termPrefix) | |||
} | |||
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) { | |||
return newUpsideDownCouchDocIDReader(i) | |||
} | |||
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { | |||
return newUpsideDownCouchDocIDReaderOnly(i, ids) | |||
} | |||
func (i *IndexReader) Document(id string) (doc *document.Document, err error) { | |||
// first hit the back index to confirm doc exists | |||
var backIndexRow *BackIndexRow | |||
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id)) | |||
if err != nil { | |||
return | |||
} | |||
if backIndexRow == nil { | |||
return | |||
} | |||
doc = document.NewDocument(id) | |||
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil) | |||
storedRowScanPrefix := storedRow.ScanPrefixForDoc() | |||
it := i.kvreader.PrefixIterator(storedRowScanPrefix) | |||
defer func() { | |||
if cerr := it.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
key, val, valid := it.Current() | |||
for valid { | |||
safeVal := make([]byte, len(val)) | |||
copy(safeVal, val) | |||
var row *StoredRow | |||
row, err = NewStoredRowKV(key, safeVal) | |||
if err != nil { | |||
doc = nil | |||
return | |||
} | |||
if row != nil { | |||
fieldName := i.index.fieldCache.FieldIndexed(row.field) | |||
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value) | |||
if field != nil { | |||
doc.AddField(field) | |||
} | |||
} | |||
it.Next() | |||
key, val, valid = it.Current() | |||
} | |||
return | |||
} | |||
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { | |||
back, err := backIndexRowForDoc(i.kvreader, id) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if back == nil { | |||
return nil, nil | |||
} | |||
rv := make(index.FieldTerms, len(fields)) | |||
fieldsMap := make(map[uint16]string, len(fields)) | |||
for _, f := range fields { | |||
id, ok := i.index.fieldCache.FieldNamed(f, false) | |||
if ok { | |||
fieldsMap[id] = f | |||
} | |||
} | |||
for _, entry := range back.termEntries { | |||
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { | |||
rv[field] = append(rv[field], *entry.Term) | |||
} | |||
} | |||
return rv, nil | |||
} | |||
func (i *IndexReader) Fields() (fields []string, err error) { | |||
fields = make([]string, 0) | |||
it := i.kvreader.PrefixIterator([]byte{'f'}) | |||
defer func() { | |||
if cerr := it.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
key, val, valid := it.Current() | |||
for valid { | |||
var row UpsideDownCouchRow | |||
row, err = ParseFromKeyValue(key, val) | |||
if err != nil { | |||
fields = nil | |||
return | |||
} | |||
if row != nil { | |||
fieldRow, ok := row.(*FieldRow) | |||
if ok { | |||
fields = append(fields, fieldRow.name) | |||
} | |||
} | |||
it.Next() | |||
key, val, valid = it.Current() | |||
} | |||
return | |||
} | |||
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) { | |||
internalRow := NewInternalRow(key, nil) | |||
return i.kvreader.Get(internalRow.Key()) | |||
} | |||
func (i *IndexReader) DocCount() (uint64, error) { | |||
return i.docCount, nil | |||
} | |||
func (i *IndexReader) Close() error { | |||
return i.kvreader.Close() | |||
} | |||
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) { | |||
return string(id), nil | |||
} | |||
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) { | |||
return index.IndexInternalID(id), nil | |||
} | |||
func incrementBytes(in []byte) []byte { | |||
rv := make([]byte, len(in)) | |||
copy(rv, in) | |||
for i := len(rv) - 1; i >= 0; i-- { | |||
rv[i] = rv[i] + 1 | |||
if rv[i] != 0 { | |||
// didn't overflow, so stop | |||
break | |||
} | |||
} | |||
return rv | |||
} |
@@ -0,0 +1,325 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"bytes" | |||
"sort" | |||
"sync/atomic" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
type UpsideDownCouchTermFieldReader struct { | |||
count uint64 | |||
indexReader *IndexReader | |||
iterator store.KVIterator | |||
term []byte | |||
tfrNext *TermFrequencyRow | |||
keyBuf []byte | |||
field uint16 | |||
} | |||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { | |||
dictionaryRow := NewDictionaryRow(term, field, 0) | |||
val, err := indexReader.kvreader.Get(dictionaryRow.Key()) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if val == nil { | |||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) | |||
return &UpsideDownCouchTermFieldReader{ | |||
count: 0, | |||
term: term, | |||
tfrNext: &TermFrequencyRow{}, | |||
field: field, | |||
}, nil | |||
} | |||
err = dictionaryRow.parseDictionaryV(val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) | |||
it := indexReader.kvreader.PrefixIterator(tfr.Key()) | |||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) | |||
return &UpsideDownCouchTermFieldReader{ | |||
indexReader: indexReader, | |||
iterator: it, | |||
count: dictionaryRow.count, | |||
term: term, | |||
field: field, | |||
}, nil | |||
} | |||
func (r *UpsideDownCouchTermFieldReader) Count() uint64 { | |||
return r.count | |||
} | |||
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { | |||
if r.iterator != nil { | |||
// We treat tfrNext also like an initialization flag, which | |||
// tells us whether we need to invoke the underlying | |||
// iterator.Next(). The first time, don't call iterator.Next(). | |||
if r.tfrNext != nil { | |||
r.iterator.Next() | |||
} else { | |||
r.tfrNext = &TermFrequencyRow{} | |||
} | |||
key, val, valid := r.iterator.Current() | |||
if valid { | |||
tfr := r.tfrNext | |||
err := tfr.parseKDoc(key, r.term) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = tfr.parseV(val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := preAlloced | |||
if rv == nil { | |||
rv = &index.TermFieldDoc{} | |||
} | |||
rv.ID = append(rv.ID, tfr.doc...) | |||
rv.Freq = tfr.freq | |||
rv.Norm = float64(tfr.norm) | |||
if tfr.vectors != nil { | |||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) | |||
} | |||
return rv, nil | |||
} | |||
} | |||
return nil, nil | |||
} | |||
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) { | |||
if r.iterator != nil { | |||
if r.tfrNext == nil { | |||
r.tfrNext = &TermFrequencyRow{} | |||
} | |||
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0) | |||
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0]) | |||
if err != nil { | |||
return nil, err | |||
} | |||
r.iterator.Seek(r.keyBuf) | |||
key, val, valid := r.iterator.Current() | |||
if valid { | |||
err := tfr.parseKDoc(key, r.term) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = tfr.parseV(val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv = preAlloced | |||
if rv == nil { | |||
rv = &index.TermFieldDoc{} | |||
} | |||
rv.ID = append(rv.ID, tfr.doc...) | |||
rv.Freq = tfr.freq | |||
rv.Norm = float64(tfr.norm) | |||
if tfr.vectors != nil { | |||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) | |||
} | |||
return rv, nil | |||
} | |||
} | |||
return nil, nil | |||
} | |||
func (r *UpsideDownCouchTermFieldReader) Close() error { | |||
if r.indexReader != nil { | |||
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1)) | |||
} | |||
if r.iterator != nil { | |||
return r.iterator.Close() | |||
} | |||
return nil | |||
} | |||
type UpsideDownCouchDocIDReader struct { | |||
indexReader *IndexReader | |||
iterator store.KVIterator | |||
only []string | |||
onlyPos int | |||
onlyMode bool | |||
} | |||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { | |||
startBytes := []byte{0x0} | |||
endBytes := []byte{0xff} | |||
bisr := NewBackIndexRow(startBytes, nil, nil) | |||
bier := NewBackIndexRow(endBytes, nil, nil) | |||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) | |||
return &UpsideDownCouchDocIDReader{ | |||
indexReader: indexReader, | |||
iterator: it, | |||
}, nil | |||
} | |||
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) { | |||
// ensure ids are sorted | |||
sort.Strings(ids) | |||
startBytes := []byte{0x0} | |||
if len(ids) > 0 { | |||
startBytes = []byte(ids[0]) | |||
} | |||
endBytes := []byte{0xff} | |||
if len(ids) > 0 { | |||
endBytes = incrementBytes([]byte(ids[len(ids)-1])) | |||
} | |||
bisr := NewBackIndexRow(startBytes, nil, nil) | |||
bier := NewBackIndexRow(endBytes, nil, nil) | |||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) | |||
return &UpsideDownCouchDocIDReader{ | |||
indexReader: indexReader, | |||
iterator: it, | |||
only: ids, | |||
onlyMode: true, | |||
}, nil | |||
} | |||
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) { | |||
key, val, valid := r.iterator.Current() | |||
if r.onlyMode { | |||
var rv index.IndexInternalID | |||
for valid && r.onlyPos < len(r.only) { | |||
br, err := NewBackIndexRowKV(key, val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { | |||
ok := r.nextOnly() | |||
if !ok { | |||
return nil, nil | |||
} | |||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) | |||
key, val, valid = r.iterator.Current() | |||
continue | |||
} else { | |||
rv = append([]byte(nil), br.doc...) | |||
break | |||
} | |||
} | |||
if valid && r.onlyPos < len(r.only) { | |||
ok := r.nextOnly() | |||
if ok { | |||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) | |||
} | |||
return rv, nil | |||
} | |||
} else { | |||
if valid { | |||
br, err := NewBackIndexRowKV(key, val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := append([]byte(nil), br.doc...) | |||
r.iterator.Next() | |||
return rv, nil | |||
} | |||
} | |||
return nil, nil | |||
} | |||
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) { | |||
if r.onlyMode { | |||
r.onlyPos = sort.SearchStrings(r.only, string(docID)) | |||
if r.onlyPos >= len(r.only) { | |||
// advanced to key after our last only key | |||
return nil, nil | |||
} | |||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) | |||
key, val, valid := r.iterator.Current() | |||
var rv index.IndexInternalID | |||
for valid && r.onlyPos < len(r.only) { | |||
br, err := NewBackIndexRowKV(key, val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { | |||
// the only key we seek'd to didn't exist | |||
// now look for the closest key that did exist in only | |||
r.onlyPos = sort.SearchStrings(r.only, string(br.doc)) | |||
if r.onlyPos >= len(r.only) { | |||
// advanced to key after our last only key | |||
return nil, nil | |||
} | |||
// now seek to this new only key | |||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) | |||
key, val, valid = r.iterator.Current() | |||
continue | |||
} else { | |||
rv = append([]byte(nil), br.doc...) | |||
break | |||
} | |||
} | |||
if valid && r.onlyPos < len(r.only) { | |||
ok := r.nextOnly() | |||
if ok { | |||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) | |||
} | |||
return rv, nil | |||
} | |||
} else { | |||
bir := NewBackIndexRow(docID, nil, nil) | |||
r.iterator.Seek(bir.Key()) | |||
key, val, valid := r.iterator.Current() | |||
if valid { | |||
br, err := NewBackIndexRowKV(key, val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := append([]byte(nil), br.doc...) | |||
r.iterator.Next() | |||
return rv, nil | |||
} | |||
} | |||
return nil, nil | |||
} | |||
func (r *UpsideDownCouchDocIDReader) Close() error { | |||
return r.iterator.Close() | |||
} | |||
// move the r.only pos forward one, skipping duplicates | |||
// return true if there is more data, or false if we got to the end of the list | |||
func (r *UpsideDownCouchDocIDReader) nextOnly() bool { | |||
// advance 1 position, until we see a different key | |||
// it's already sorted, so this skips duplicates | |||
start := r.onlyPos | |||
r.onlyPos++ | |||
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] { | |||
start = r.onlyPos | |||
r.onlyPos++ | |||
} | |||
// inidicate if we got to the end of the list | |||
return r.onlyPos < len(r.only) | |||
} |
@@ -0,0 +1,853 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"bytes" | |||
"encoding/binary" | |||
"fmt" | |||
"io" | |||
"math" | |||
"github.com/golang/protobuf/proto" | |||
) | |||
const ByteSeparator byte = 0xff | |||
type UpsideDownCouchRowStream chan UpsideDownCouchRow | |||
type UpsideDownCouchRow interface { | |||
KeySize() int | |||
KeyTo([]byte) (int, error) | |||
Key() []byte | |||
Value() []byte | |||
ValueSize() int | |||
ValueTo([]byte) (int, error) | |||
} | |||
func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { | |||
if len(key) > 0 { | |||
switch key[0] { | |||
case 'v': | |||
return NewVersionRowKV(key, value) | |||
case 'f': | |||
return NewFieldRowKV(key, value) | |||
case 'd': | |||
return NewDictionaryRowKV(key, value) | |||
case 't': | |||
return NewTermFrequencyRowKV(key, value) | |||
case 'b': | |||
return NewBackIndexRowKV(key, value) | |||
case 's': | |||
return NewStoredRowKV(key, value) | |||
case 'i': | |||
return NewInternalRowKV(key, value) | |||
} | |||
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0])) | |||
} | |||
return nil, fmt.Errorf("Invalid empty key") | |||
} | |||
// VERSION | |||
type VersionRow struct { | |||
version uint8 | |||
} | |||
func (v *VersionRow) Key() []byte { | |||
return []byte{'v'} | |||
} | |||
func (v *VersionRow) KeySize() int { | |||
return 1 | |||
} | |||
func (v *VersionRow) KeyTo(buf []byte) (int, error) { | |||
buf[0] = 'v' | |||
return 1, nil | |||
} | |||
func (v *VersionRow) Value() []byte { | |||
return []byte{byte(v.version)} | |||
} | |||
func (v *VersionRow) ValueSize() int { | |||
return 1 | |||
} | |||
func (v *VersionRow) ValueTo(buf []byte) (int, error) { | |||
buf[0] = v.version | |||
return 1, nil | |||
} | |||
func (v *VersionRow) String() string { | |||
return fmt.Sprintf("Version: %d", v.version) | |||
} | |||
func NewVersionRow(version uint8) *VersionRow { | |||
return &VersionRow{ | |||
version: version, | |||
} | |||
} | |||
func NewVersionRowKV(key, value []byte) (*VersionRow, error) { | |||
rv := VersionRow{} | |||
buf := bytes.NewBuffer(value) | |||
err := binary.Read(buf, binary.LittleEndian, &rv.version) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &rv, nil | |||
} | |||
// INTERNAL STORAGE | |||
type InternalRow struct { | |||
key []byte | |||
val []byte | |||
} | |||
func (i *InternalRow) Key() []byte { | |||
buf := make([]byte, i.KeySize()) | |||
size, _ := i.KeyTo(buf) | |||
return buf[:size] | |||
} | |||
func (i *InternalRow) KeySize() int { | |||
return len(i.key) + 1 | |||
} | |||
func (i *InternalRow) KeyTo(buf []byte) (int, error) { | |||
buf[0] = 'i' | |||
actual := copy(buf[1:], i.key) | |||
return 1 + actual, nil | |||
} | |||
func (i *InternalRow) Value() []byte { | |||
return i.val | |||
} | |||
func (i *InternalRow) ValueSize() int { | |||
return len(i.val) | |||
} | |||
func (i *InternalRow) ValueTo(buf []byte) (int, error) { | |||
actual := copy(buf, i.val) | |||
return actual, nil | |||
} | |||
func (i *InternalRow) String() string { | |||
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val) | |||
} | |||
func NewInternalRow(key, val []byte) *InternalRow { | |||
return &InternalRow{ | |||
key: key, | |||
val: val, | |||
} | |||
} | |||
func NewInternalRowKV(key, value []byte) (*InternalRow, error) { | |||
rv := InternalRow{} | |||
rv.key = key[1:] | |||
rv.val = value | |||
return &rv, nil | |||
} | |||
// FIELD definition | |||
type FieldRow struct { | |||
index uint16 | |||
name string | |||
} | |||
func (f *FieldRow) Key() []byte { | |||
buf := make([]byte, f.KeySize()) | |||
size, _ := f.KeyTo(buf) | |||
return buf[:size] | |||
} | |||
func (f *FieldRow) KeySize() int { | |||
return 3 | |||
} | |||
func (f *FieldRow) KeyTo(buf []byte) (int, error) { | |||
buf[0] = 'f' | |||
binary.LittleEndian.PutUint16(buf[1:3], f.index) | |||
return 3, nil | |||
} | |||
func (f *FieldRow) Value() []byte { | |||
return append([]byte(f.name), ByteSeparator) | |||
} | |||
func (f *FieldRow) ValueSize() int { | |||
return len(f.name) + 1 | |||
} | |||
func (f *FieldRow) ValueTo(buf []byte) (int, error) { | |||
size := copy(buf, f.name) | |||
buf[size] = ByteSeparator | |||
return size + 1, nil | |||
} | |||
func (f *FieldRow) String() string { | |||
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name) | |||
} | |||
func NewFieldRow(index uint16, name string) *FieldRow { | |||
return &FieldRow{ | |||
index: index, | |||
name: name, | |||
} | |||
} | |||
func NewFieldRowKV(key, value []byte) (*FieldRow, error) { | |||
rv := FieldRow{} | |||
buf := bytes.NewBuffer(key) | |||
_, err := buf.ReadByte() // type | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = binary.Read(buf, binary.LittleEndian, &rv.index) | |||
if err != nil { | |||
return nil, err | |||
} | |||
buf = bytes.NewBuffer(value) | |||
rv.name, err = buf.ReadString(ByteSeparator) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte | |||
return &rv, nil | |||
} | |||
// DICTIONARY | |||
const DictionaryRowMaxValueSize = binary.MaxVarintLen64 | |||
type DictionaryRow struct { | |||
term []byte | |||
count uint64 | |||
field uint16 | |||
} | |||
func (dr *DictionaryRow) Key() []byte { | |||
buf := make([]byte, dr.KeySize()) | |||
size, _ := dr.KeyTo(buf) | |||
return buf[:size] | |||
} | |||
func (dr *DictionaryRow) KeySize() int { | |||
return len(dr.term) + 3 | |||
} | |||
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { | |||
buf[0] = 'd' | |||
binary.LittleEndian.PutUint16(buf[1:3], dr.field) | |||
size := copy(buf[3:], dr.term) | |||
return size + 3, nil | |||
} | |||
func (dr *DictionaryRow) Value() []byte { | |||
buf := make([]byte, dr.ValueSize()) | |||
size, _ := dr.ValueTo(buf) | |||
return buf[:size] | |||
} | |||
func (dr *DictionaryRow) ValueSize() int { | |||
return DictionaryRowMaxValueSize | |||
} | |||
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { | |||
used := binary.PutUvarint(buf, dr.count) | |||
return used, nil | |||
} | |||
func (dr *DictionaryRow) String() string { | |||
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count) | |||
} | |||
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow { | |||
return &DictionaryRow{ | |||
term: term, | |||
field: field, | |||
count: count, | |||
} | |||
} | |||
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) { | |||
rv, err := NewDictionaryRowK(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = rv.parseDictionaryV(value) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return rv, nil | |||
} | |||
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { | |||
rv := &DictionaryRow{} | |||
err := rv.parseDictionaryK(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return rv, nil | |||
} | |||
func (dr *DictionaryRow) parseDictionaryK(key []byte) error { | |||
dr.field = binary.LittleEndian.Uint16(key[1:3]) | |||
if dr.term != nil { | |||
dr.term = dr.term[:0] | |||
} | |||
dr.term = append(dr.term, key[3:]...) | |||
return nil | |||
} | |||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error { | |||
count, nread := binary.Uvarint(value) | |||
if nread <= 0 { | |||
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) | |||
} | |||
dr.count = count | |||
return nil | |||
} | |||
// TERM FIELD FREQUENCY | |||
type TermVector struct { | |||
field uint16 | |||
arrayPositions []uint64 | |||
pos uint64 | |||
start uint64 | |||
end uint64 | |||
} | |||
func (tv *TermVector) String() string { | |||
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) | |||
} | |||
type TermFrequencyRow struct { | |||
term []byte | |||
doc []byte | |||
freq uint64 | |||
vectors []*TermVector | |||
norm float32 | |||
field uint16 | |||
} | |||
func (tfr *TermFrequencyRow) Term() []byte { | |||
return tfr.term | |||
} | |||
func (tfr *TermFrequencyRow) Freq() uint64 { | |||
return tfr.freq | |||
} | |||
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte { | |||
buf := make([]byte, 3) | |||
buf[0] = 't' | |||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) | |||
return buf | |||
} | |||
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte { | |||
buf := make([]byte, 3+len(tfr.term)) | |||
buf[0] = 't' | |||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) | |||
copy(buf[3:], tfr.term) | |||
return buf | |||
} | |||
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte { | |||
buf := make([]byte, 3+len(tfr.term)+1) | |||
buf[0] = 't' | |||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) | |||
termLen := copy(buf[3:], tfr.term) | |||
buf[3+termLen] = ByteSeparator | |||
return buf | |||
} | |||
func (tfr *TermFrequencyRow) Key() []byte { | |||
buf := make([]byte, tfr.KeySize()) | |||
size, _ := tfr.KeyTo(buf) | |||
return buf[:size] | |||
} | |||
func (tfr *TermFrequencyRow) KeySize() int { | |||
return 3 + len(tfr.term) + 1 + len(tfr.doc) | |||
} | |||
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { | |||
buf[0] = 't' | |||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) | |||
termLen := copy(buf[3:], tfr.term) | |||
buf[3+termLen] = ByteSeparator | |||
docLen := copy(buf[3+termLen+1:], tfr.doc) | |||
return 3 + termLen + 1 + docLen, nil | |||
} | |||
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { | |||
keySize := tfr.KeySize() | |||
if cap(buf) < keySize { | |||
buf = make([]byte, keySize) | |||
} | |||
actualSize, err := tfr.KeyTo(buf[0:keySize]) | |||
return buf[0:actualSize], err | |||
} | |||
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { | |||
dr := NewDictionaryRow(tfr.term, tfr.field, 0) | |||
return dr.Key() | |||
} | |||
func (tfr *TermFrequencyRow) DictionaryRowKeySize() int { | |||
dr := NewDictionaryRow(tfr.term, tfr.field, 0) | |||
return dr.KeySize() | |||
} | |||
func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) { | |||
dr := NewDictionaryRow(tfr.term, tfr.field, 0) | |||
return dr.KeyTo(buf) | |||
} | |||
func (tfr *TermFrequencyRow) Value() []byte { | |||
buf := make([]byte, tfr.ValueSize()) | |||
size, _ := tfr.ValueTo(buf) | |||
return buf[:size] | |||
} | |||
func (tfr *TermFrequencyRow) ValueSize() int { | |||
bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64 | |||
for _, vector := range tfr.vectors { | |||
bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64 | |||
} | |||
return bufLen | |||
} | |||
func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) { | |||
used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq) | |||
normuint32 := math.Float32bits(tfr.norm) | |||
newbuf := buf[used : used+binary.MaxVarintLen64] | |||
used += binary.PutUvarint(newbuf, uint64(normuint32)) | |||
for _, vector := range tfr.vectors { | |||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field)) | |||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos) | |||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start) | |||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end) | |||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions))) | |||
for _, arrayPosition := range vector.arrayPositions { | |||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition) | |||
} | |||
} | |||
return used, nil | |||
} | |||
func (tfr *TermFrequencyRow) String() string { | |||
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors) | |||
} | |||
func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { | |||
tfr.term = term | |||
tfr.field = field | |||
tfr.doc = docID | |||
tfr.freq = freq | |||
tfr.norm = norm | |||
return tfr | |||
} | |||
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { | |||
return &TermFrequencyRow{ | |||
term: term, | |||
field: field, | |||
doc: docID, | |||
freq: freq, | |||
norm: norm, | |||
} | |||
} | |||
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow { | |||
return &TermFrequencyRow{ | |||
term: term, | |||
field: field, | |||
doc: docID, | |||
freq: freq, | |||
norm: norm, | |||
vectors: vectors, | |||
} | |||
} | |||
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) { | |||
rv := &TermFrequencyRow{} | |||
err := rv.parseK(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return rv, nil | |||
} | |||
func (tfr *TermFrequencyRow) parseK(key []byte) error { | |||
keyLen := len(key) | |||
if keyLen < 3 { | |||
return fmt.Errorf("invalid term frequency key, no valid field") | |||
} | |||
tfr.field = binary.LittleEndian.Uint16(key[1:3]) | |||
termEndPos := bytes.IndexByte(key[3:], ByteSeparator) | |||
if termEndPos < 0 { | |||
return fmt.Errorf("invalid term frequency key, no byte separator terminating term") | |||
} | |||
tfr.term = key[3 : 3+termEndPos] | |||
docLen := keyLen - (3 + termEndPos + 1) | |||
if docLen < 1 { | |||
return fmt.Errorf("invalid term frequency key, empty docid") | |||
} | |||
tfr.doc = key[3+termEndPos+1:] | |||
return nil | |||
} | |||
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { | |||
tfr.doc = key[3+len(term)+1:] | |||
if len(tfr.doc) <= 0 { | |||
return fmt.Errorf("invalid term frequency key, empty docid") | |||
} | |||
return nil | |||
} | |||
func (tfr *TermFrequencyRow) parseV(value []byte) error { | |||
var bytesRead int | |||
tfr.freq, bytesRead = binary.Uvarint(value) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, invalid frequency") | |||
} | |||
currOffset := bytesRead | |||
var norm uint64 | |||
norm, bytesRead = binary.Uvarint(value[currOffset:]) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, no norm") | |||
} | |||
currOffset += bytesRead | |||
tfr.norm = math.Float32frombits(uint32(norm)) | |||
tfr.vectors = nil | |||
var field uint64 | |||
field, bytesRead = binary.Uvarint(value[currOffset:]) | |||
for bytesRead > 0 { | |||
currOffset += bytesRead | |||
tv := TermVector{} | |||
tv.field = uint16(field) | |||
// at this point we expect at least one term vector | |||
if tfr.vectors == nil { | |||
tfr.vectors = make([]*TermVector, 0) | |||
} | |||
tv.pos, bytesRead = binary.Uvarint(value[currOffset:]) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, vector contains no position") | |||
} | |||
currOffset += bytesRead | |||
tv.start, bytesRead = binary.Uvarint(value[currOffset:]) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, vector contains no start") | |||
} | |||
currOffset += bytesRead | |||
tv.end, bytesRead = binary.Uvarint(value[currOffset:]) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, vector contains no end") | |||
} | |||
currOffset += bytesRead | |||
var arrayPositionsLen uint64 = 0 | |||
arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:]) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen") | |||
} | |||
currOffset += bytesRead | |||
if arrayPositionsLen > 0 { | |||
tv.arrayPositions = make([]uint64, arrayPositionsLen) | |||
for i := 0; uint64(i) < arrayPositionsLen; i++ { | |||
tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:]) | |||
if bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i) | |||
} | |||
currOffset += bytesRead | |||
} | |||
} | |||
tfr.vectors = append(tfr.vectors, &tv) | |||
// try to read next record (may not exist) | |||
field, bytesRead = binary.Uvarint(value[currOffset:]) | |||
} | |||
if len(value[currOffset:]) > 0 && bytesRead <= 0 { | |||
return fmt.Errorf("invalid term frequency value, vector field invalid") | |||
} | |||
return nil | |||
} | |||
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { | |||
rv, err := NewTermFrequencyRowK(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = rv.parseV(value) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return rv, nil | |||
} | |||
type BackIndexRow struct { | |||
doc []byte | |||
termEntries []*BackIndexTermEntry | |||
storedEntries []*BackIndexStoreEntry | |||
} | |||
func (br *BackIndexRow) AllTermKeys() [][]byte { | |||
if br == nil { | |||
return nil | |||
} | |||
rv := make([][]byte, len(br.termEntries)) | |||
for i, termEntry := range br.termEntries { | |||
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) | |||
rv[i] = termRow.Key() | |||
} | |||
return rv | |||
} | |||
func (br *BackIndexRow) AllStoredKeys() [][]byte { | |||
if br == nil { | |||
return nil | |||
} | |||
rv := make([][]byte, len(br.storedEntries)) | |||
for i, storedEntry := range br.storedEntries { | |||
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{}) | |||
rv[i] = storedRow.Key() | |||
} | |||
return rv | |||
} | |||
func (br *BackIndexRow) Key() []byte { | |||
buf := make([]byte, br.KeySize()) | |||
size, _ := br.KeyTo(buf) | |||
return buf[:size] | |||
} | |||
func (br *BackIndexRow) KeySize() int { | |||
return len(br.doc) + 1 | |||
} | |||
func (br *BackIndexRow) KeyTo(buf []byte) (int, error) { | |||
buf[0] = 'b' | |||
used := copy(buf[1:], br.doc) | |||
return used + 1, nil | |||
} | |||
func (br *BackIndexRow) Value() []byte { | |||
buf := make([]byte, br.ValueSize()) | |||
size, _ := br.ValueTo(buf) | |||
return buf[:size] | |||
} | |||
func (br *BackIndexRow) ValueSize() int { | |||
birv := &BackIndexRowValue{ | |||
TermEntries: br.termEntries, | |||
StoredEntries: br.storedEntries, | |||
} | |||
return birv.Size() | |||
} | |||
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { | |||
birv := &BackIndexRowValue{ | |||
TermEntries: br.termEntries, | |||
StoredEntries: br.storedEntries, | |||
} | |||
return birv.MarshalTo(buf) | |||
} | |||
func (br *BackIndexRow) String() string { | |||
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) | |||
} | |||
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { | |||
return &BackIndexRow{ | |||
doc: docID, | |||
termEntries: entries, | |||
storedEntries: storedFields, | |||
} | |||
} | |||
func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { | |||
rv := BackIndexRow{} | |||
buf := bytes.NewBuffer(key) | |||
_, err := buf.ReadByte() // type | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.doc, err = buf.ReadBytes(ByteSeparator) | |||
if err == io.EOF && len(rv.doc) < 1 { | |||
err = fmt.Errorf("invalid doc length 0 - % x", key) | |||
} | |||
if err != nil && err != io.EOF { | |||
return nil, err | |||
} else if err == nil { | |||
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte | |||
} | |||
var birv BackIndexRowValue | |||
err = proto.Unmarshal(value, &birv) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.termEntries = birv.TermEntries | |||
rv.storedEntries = birv.StoredEntries | |||
return &rv, nil | |||
} | |||
// STORED | |||
type StoredRow struct { | |||
doc []byte | |||
field uint16 | |||
arrayPositions []uint64 | |||
typ byte | |||
value []byte | |||
} | |||
func (s *StoredRow) Key() []byte { | |||
buf := make([]byte, s.KeySize()) | |||
size, _ := s.KeyTo(buf) | |||
return buf[0:size] | |||
} | |||
func (s *StoredRow) KeySize() int { | |||
return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions)) | |||
} | |||
func (s *StoredRow) KeyTo(buf []byte) (int, error) { | |||
docLen := len(s.doc) | |||
buf[0] = 's' | |||
copy(buf[1:], s.doc) | |||
buf[1+docLen] = ByteSeparator | |||
binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field) | |||
bytesUsed := 1 + docLen + 1 + 2 | |||
for _, arrayPosition := range s.arrayPositions { | |||
varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition) | |||
bytesUsed += varbytes | |||
} | |||
return bytesUsed, nil | |||
} | |||
func (s *StoredRow) Value() []byte { | |||
buf := make([]byte, s.ValueSize()) | |||
size, _ := s.ValueTo(buf) | |||
return buf[:size] | |||
} | |||
func (s *StoredRow) ValueSize() int { | |||
return len(s.value) + 1 | |||
} | |||
func (s *StoredRow) ValueTo(buf []byte) (int, error) { | |||
buf[0] = s.typ | |||
used := copy(buf[1:], s.value) | |||
return used + 1, nil | |||
} | |||
func (s *StoredRow) String() string { | |||
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value) | |||
} | |||
func (s *StoredRow) ScanPrefixForDoc() []byte { | |||
docLen := len(s.doc) | |||
buf := make([]byte, 1+docLen+1) | |||
buf[0] = 's' | |||
copy(buf[1:], s.doc) | |||
buf[1+docLen] = ByteSeparator | |||
return buf | |||
} | |||
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow { | |||
return &StoredRow{ | |||
doc: docID, | |||
field: field, | |||
arrayPositions: arrayPositions, | |||
typ: typ, | |||
value: value, | |||
} | |||
} | |||
func NewStoredRowK(key []byte) (*StoredRow, error) { | |||
rv := StoredRow{} | |||
buf := bytes.NewBuffer(key) | |||
_, err := buf.ReadByte() // type | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.doc, err = buf.ReadBytes(ByteSeparator) | |||
if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator | |||
err = fmt.Errorf("invalid doc length 0") | |||
return nil, err | |||
} | |||
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte | |||
err = binary.Read(buf, binary.LittleEndian, &rv.field) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.arrayPositions = make([]uint64, 0) | |||
nextArrayPos, err := binary.ReadUvarint(buf) | |||
for err == nil { | |||
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos) | |||
nextArrayPos, err = binary.ReadUvarint(buf) | |||
} | |||
return &rv, nil | |||
} | |||
func NewStoredRowKV(key, value []byte) (*StoredRow, error) { | |||
rv, err := NewStoredRowK(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.typ = value[0] | |||
rv.value = value[1:] | |||
return rv, nil | |||
} |
@@ -0,0 +1,76 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"encoding/binary" | |||
) | |||
var mergeOperator upsideDownMerge | |||
var dictionaryTermIncr []byte | |||
var dictionaryTermDecr []byte | |||
func init() { | |||
dictionaryTermIncr = make([]byte, 8) | |||
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1)) | |||
dictionaryTermDecr = make([]byte, 8) | |||
var negOne = int64(-1) | |||
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne)) | |||
} | |||
type upsideDownMerge struct{} | |||
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) { | |||
// set up record based on key | |||
dr, err := NewDictionaryRowK(key) | |||
if err != nil { | |||
return nil, false | |||
} | |||
if len(existingValue) > 0 { | |||
// if existing value, parse it | |||
err = dr.parseDictionaryV(existingValue) | |||
if err != nil { | |||
return nil, false | |||
} | |||
} | |||
// now process operands | |||
for _, operand := range operands { | |||
next := int64(binary.LittleEndian.Uint64(operand)) | |||
if next < 0 && uint64(-next) > dr.count { | |||
// subtracting next from existing would overflow | |||
dr.count = 0 | |||
} else if next < 0 { | |||
dr.count -= uint64(-next) | |||
} else { | |||
dr.count += uint64(next) | |||
} | |||
} | |||
return dr.Value(), true | |||
} | |||
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) { | |||
left := int64(binary.LittleEndian.Uint64(leftOperand)) | |||
right := int64(binary.LittleEndian.Uint64(rightOperand)) | |||
rv := make([]byte, 8) | |||
binary.LittleEndian.PutUint64(rv, uint64(left+right)) | |||
return rv, true | |||
} | |||
func (m *upsideDownMerge) Name() string { | |||
return "upsideDownMerge" | |||
} |
@@ -0,0 +1,55 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package upsidedown | |||
import ( | |||
"encoding/json" | |||
"sync/atomic" | |||
"github.com/blevesearch/bleve/index/store" | |||
) | |||
type indexStat struct { | |||
updates, deletes, batches, errors uint64 | |||
analysisTime, indexTime uint64 | |||
termSearchersStarted uint64 | |||
termSearchersFinished uint64 | |||
numPlainTextBytesIndexed uint64 | |||
i *UpsideDownCouch | |||
} | |||
func (i *indexStat) statsMap() map[string]interface{} { | |||
m := map[string]interface{}{} | |||
m["updates"] = atomic.LoadUint64(&i.updates) | |||
m["deletes"] = atomic.LoadUint64(&i.deletes) | |||
m["batches"] = atomic.LoadUint64(&i.batches) | |||
m["errors"] = atomic.LoadUint64(&i.errors) | |||
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime) | |||
m["index_time"] = atomic.LoadUint64(&i.indexTime) | |||
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted) | |||
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished) | |||
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed) | |||
if o, ok := i.i.store.(store.KVStoreStats); ok { | |||
m["kv"] = o.StatsMap() | |||
} | |||
return m | |||
} | |||
func (i *indexStat) MarshalJSON() ([]byte, error) { | |||
m := i.statsMap() | |||
return json.Marshal(m) | |||
} |
@@ -0,0 +1,684 @@ | |||
// Code generated by protoc-gen-gogo. | |||
// source: upsidedown.proto | |||
// DO NOT EDIT! | |||
/* | |||
Package upsidedown is a generated protocol buffer package. | |||
It is generated from these files: | |||
upsidedown.proto | |||
It has these top-level messages: | |||
BackIndexTermEntry | |||
BackIndexStoreEntry | |||
BackIndexRowValue | |||
*/ | |||
package upsidedown | |||
import proto "github.com/golang/protobuf/proto" | |||
import math "math" | |||
import io "io" | |||
import fmt "fmt" | |||
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" | |||
// Reference imports to suppress errors if they are not otherwise used. | |||
var _ = proto.Marshal | |||
var _ = math.Inf | |||
type BackIndexTermEntry struct { | |||
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"` | |||
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"` | |||
XXX_unrecognized []byte `json:"-"` | |||
} | |||
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} } | |||
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) } | |||
func (*BackIndexTermEntry) ProtoMessage() {} | |||
func (m *BackIndexTermEntry) GetTerm() string { | |||
if m != nil && m.Term != nil { | |||
return *m.Term | |||
} | |||
return "" | |||
} | |||
func (m *BackIndexTermEntry) GetField() uint32 { | |||
if m != nil && m.Field != nil { | |||
return *m.Field | |||
} | |||
return 0 | |||
} | |||
type BackIndexStoreEntry struct { | |||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` | |||
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"` | |||
XXX_unrecognized []byte `json:"-"` | |||
} | |||
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} } | |||
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) } | |||
func (*BackIndexStoreEntry) ProtoMessage() {} | |||
func (m *BackIndexStoreEntry) GetField() uint32 { | |||
if m != nil && m.Field != nil { | |||
return *m.Field | |||
} | |||
return 0 | |||
} | |||
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 { | |||
if m != nil { | |||
return m.ArrayPositions | |||
} | |||
return nil | |||
} | |||
type BackIndexRowValue struct { | |||
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"` | |||
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"` | |||
XXX_unrecognized []byte `json:"-"` | |||
} | |||
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} } | |||
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) } | |||
func (*BackIndexRowValue) ProtoMessage() {} | |||
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry { | |||
if m != nil { | |||
return m.TermEntries | |||
} | |||
return nil | |||
} | |||
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry { | |||
if m != nil { | |||
return m.StoredEntries | |||
} | |||
return nil | |||
} | |||
func (m *BackIndexTermEntry) Unmarshal(data []byte) error { | |||
var hasFields [1]uint64 | |||
l := len(data) | |||
iNdEx := 0 | |||
for iNdEx < l { | |||
var wire uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
wire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
fieldNum := int32(wire >> 3) | |||
wireType := int(wire & 0x7) | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) | |||
} | |||
var stringLen uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
stringLen |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + int(stringLen) | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
s := string(data[iNdEx:postIndex]) | |||
m.Term = &s | |||
iNdEx = postIndex | |||
hasFields[0] |= uint64(0x00000001) | |||
case 2: | |||
if wireType != 0 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) | |||
} | |||
var v uint32 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
v |= (uint32(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
m.Field = &v | |||
hasFields[0] |= uint64(0x00000002) | |||
default: | |||
var sizeOfWire int | |||
for { | |||
sizeOfWire++ | |||
wire >>= 7 | |||
if wire == 0 { | |||
break | |||
} | |||
} | |||
iNdEx -= sizeOfWire | |||
skippy, err := skipUpsidedown(data[iNdEx:]) | |||
if err != nil { | |||
return err | |||
} | |||
if skippy < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if (iNdEx + skippy) > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) | |||
iNdEx += skippy | |||
} | |||
} | |||
if hasFields[0]&uint64(0x00000001) == 0 { | |||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} | |||
if hasFields[0]&uint64(0x00000002) == 0 { | |||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} | |||
return nil | |||
} | |||
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error { | |||
var hasFields [1]uint64 | |||
l := len(data) | |||
iNdEx := 0 | |||
for iNdEx < l { | |||
var wire uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
wire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
fieldNum := int32(wire >> 3) | |||
wireType := int(wire & 0x7) | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 0 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) | |||
} | |||
var v uint32 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
v |= (uint32(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
m.Field = &v | |||
hasFields[0] |= uint64(0x00000001) | |||
case 2: | |||
if wireType != 0 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType) | |||
} | |||
var v uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
v |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
m.ArrayPositions = append(m.ArrayPositions, v) | |||
default: | |||
var sizeOfWire int | |||
for { | |||
sizeOfWire++ | |||
wire >>= 7 | |||
if wire == 0 { | |||
break | |||
} | |||
} | |||
iNdEx -= sizeOfWire | |||
skippy, err := skipUpsidedown(data[iNdEx:]) | |||
if err != nil { | |||
return err | |||
} | |||
if skippy < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if (iNdEx + skippy) > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) | |||
iNdEx += skippy | |||
} | |||
} | |||
if hasFields[0]&uint64(0x00000001) == 0 { | |||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} | |||
return nil | |||
} | |||
func (m *BackIndexRowValue) Unmarshal(data []byte) error { | |||
l := len(data) | |||
iNdEx := 0 | |||
for iNdEx < l { | |||
var wire uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
wire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
fieldNum := int32(wire >> 3) | |||
wireType := int(wire & 0x7) | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType) | |||
} | |||
var msglen int | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
msglen |= (int(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + msglen | |||
if msglen < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{}) | |||
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { | |||
return err | |||
} | |||
iNdEx = postIndex | |||
case 2: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType) | |||
} | |||
var msglen int | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
msglen |= (int(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + msglen | |||
if msglen < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{}) | |||
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { | |||
return err | |||
} | |||
iNdEx = postIndex | |||
default: | |||
var sizeOfWire int | |||
for { | |||
sizeOfWire++ | |||
wire >>= 7 | |||
if wire == 0 { | |||
break | |||
} | |||
} | |||
iNdEx -= sizeOfWire | |||
skippy, err := skipUpsidedown(data[iNdEx:]) | |||
if err != nil { | |||
return err | |||
} | |||
if skippy < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if (iNdEx + skippy) > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) | |||
iNdEx += skippy | |||
} | |||
} | |||
return nil | |||
} | |||
func skipUpsidedown(data []byte) (n int, err error) { | |||
l := len(data) | |||
iNdEx := 0 | |||
for iNdEx < l { | |||
var wire uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return 0, io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
wire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
wireType := int(wire & 0x7) | |||
switch wireType { | |||
case 0: | |||
for { | |||
if iNdEx >= l { | |||
return 0, io.ErrUnexpectedEOF | |||
} | |||
iNdEx++ | |||
if data[iNdEx-1] < 0x80 { | |||
break | |||
} | |||
} | |||
return iNdEx, nil | |||
case 1: | |||
iNdEx += 8 | |||
return iNdEx, nil | |||
case 2: | |||
var length int | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return 0, io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
length |= (int(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
iNdEx += length | |||
if length < 0 { | |||
return 0, ErrInvalidLengthUpsidedown | |||
} | |||
return iNdEx, nil | |||
case 3: | |||
for { | |||
var innerWire uint64 | |||
var start int = iNdEx | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return 0, io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
innerWire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
innerWireType := int(innerWire & 0x7) | |||
if innerWireType == 4 { | |||
break | |||
} | |||
next, err := skipUpsidedown(data[start:]) | |||
if err != nil { | |||
return 0, err | |||
} | |||
iNdEx = start + next | |||
} | |||
return iNdEx, nil | |||
case 4: | |||
return iNdEx, nil | |||
case 5: | |||
iNdEx += 4 | |||
return iNdEx, nil | |||
default: | |||
return 0, fmt.Errorf("proto: illegal wireType %d", wireType) | |||
} | |||
} | |||
panic("unreachable") | |||
} | |||
var ( | |||
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling") | |||
) | |||
func (m *BackIndexTermEntry) Size() (n int) { | |||
var l int | |||
_ = l | |||
if m.Term != nil { | |||
l = len(*m.Term) | |||
n += 1 + l + sovUpsidedown(uint64(l)) | |||
} | |||
if m.Field != nil { | |||
n += 1 + sovUpsidedown(uint64(*m.Field)) | |||
} | |||
if m.XXX_unrecognized != nil { | |||
n += len(m.XXX_unrecognized) | |||
} | |||
return n | |||
} | |||
func (m *BackIndexStoreEntry) Size() (n int) { | |||
var l int | |||
_ = l | |||
if m.Field != nil { | |||
n += 1 + sovUpsidedown(uint64(*m.Field)) | |||
} | |||
if len(m.ArrayPositions) > 0 { | |||
for _, e := range m.ArrayPositions { | |||
n += 1 + sovUpsidedown(uint64(e)) | |||
} | |||
} | |||
if m.XXX_unrecognized != nil { | |||
n += len(m.XXX_unrecognized) | |||
} | |||
return n | |||
} | |||
func (m *BackIndexRowValue) Size() (n int) { | |||
var l int | |||
_ = l | |||
if len(m.TermEntries) > 0 { | |||
for _, e := range m.TermEntries { | |||
l = e.Size() | |||
n += 1 + l + sovUpsidedown(uint64(l)) | |||
} | |||
} | |||
if len(m.StoredEntries) > 0 { | |||
for _, e := range m.StoredEntries { | |||
l = e.Size() | |||
n += 1 + l + sovUpsidedown(uint64(l)) | |||
} | |||
} | |||
if m.XXX_unrecognized != nil { | |||
n += len(m.XXX_unrecognized) | |||
} | |||
return n | |||
} | |||
func sovUpsidedown(x uint64) (n int) { | |||
for { | |||
n++ | |||
x >>= 7 | |||
if x == 0 { | |||
break | |||
} | |||
} | |||
return n | |||
} | |||
func sozUpsidedown(x uint64) (n int) { | |||
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63)))) | |||
} | |||
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { | |||
size := m.Size() | |||
data = make([]byte, size) | |||
n, err := m.MarshalTo(data) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return data[:n], nil | |||
} | |||
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) { | |||
var i int | |||
_ = i | |||
var l int | |||
_ = l | |||
if m.Term == nil { | |||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} else { | |||
data[i] = 0xa | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term))) | |||
i += copy(data[i:], *m.Term) | |||
} | |||
if m.Field == nil { | |||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} else { | |||
data[i] = 0x10 | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) | |||
} | |||
if m.XXX_unrecognized != nil { | |||
i += copy(data[i:], m.XXX_unrecognized) | |||
} | |||
return i, nil | |||
} | |||
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) { | |||
size := m.Size() | |||
data = make([]byte, size) | |||
n, err := m.MarshalTo(data) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return data[:n], nil | |||
} | |||
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) { | |||
var i int | |||
_ = i | |||
var l int | |||
_ = l | |||
if m.Field == nil { | |||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} else { | |||
data[i] = 0x8 | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) | |||
} | |||
if len(m.ArrayPositions) > 0 { | |||
for _, num := range m.ArrayPositions { | |||
data[i] = 0x10 | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(num)) | |||
} | |||
} | |||
if m.XXX_unrecognized != nil { | |||
i += copy(data[i:], m.XXX_unrecognized) | |||
} | |||
return i, nil | |||
} | |||
func (m *BackIndexRowValue) Marshal() (data []byte, err error) { | |||
size := m.Size() | |||
data = make([]byte, size) | |||
n, err := m.MarshalTo(data) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return data[:n], nil | |||
} | |||
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) { | |||
var i int | |||
_ = i | |||
var l int | |||
_ = l | |||
if len(m.TermEntries) > 0 { | |||
for _, msg := range m.TermEntries { | |||
data[i] = 0xa | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) | |||
n, err := msg.MarshalTo(data[i:]) | |||
if err != nil { | |||
return 0, err | |||
} | |||
i += n | |||
} | |||
} | |||
if len(m.StoredEntries) > 0 { | |||
for _, msg := range m.StoredEntries { | |||
data[i] = 0x12 | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) | |||
n, err := msg.MarshalTo(data[i:]) | |||
if err != nil { | |||
return 0, err | |||
} | |||
i += n | |||
} | |||
} | |||
if m.XXX_unrecognized != nil { | |||
i += copy(data[i:], m.XXX_unrecognized) | |||
} | |||
return i, nil | |||
} | |||
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int { | |||
data[offset] = uint8(v) | |||
data[offset+1] = uint8(v >> 8) | |||
data[offset+2] = uint8(v >> 16) | |||
data[offset+3] = uint8(v >> 24) | |||
data[offset+4] = uint8(v >> 32) | |||
data[offset+5] = uint8(v >> 40) | |||
data[offset+6] = uint8(v >> 48) | |||
data[offset+7] = uint8(v >> 56) | |||
return offset + 8 | |||
} | |||
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int { | |||
data[offset] = uint8(v) | |||
data[offset+1] = uint8(v >> 8) | |||
data[offset+2] = uint8(v >> 16) | |||
data[offset+3] = uint8(v >> 24) | |||
return offset + 4 | |||
} | |||
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int { | |||
for v >= 1<<7 { | |||
data[offset] = uint8(v&0x7f | 0x80) | |||
v >>= 7 | |||
offset++ | |||
} | |||
data[offset] = uint8(v) | |||
return offset + 1 | |||
} |
@@ -0,0 +1,14 @@ | |||
message BackIndexTermEntry { | |||
required string term = 1; | |||
required uint32 field = 2; | |||
} | |||
message BackIndexStoreEntry { | |||
required uint32 field = 1; | |||
repeated uint64 arrayPositions = 2; | |||
} | |||
message BackIndexRowValue { | |||
repeated BackIndexTermEntry termEntries = 1; | |||
repeated BackIndexStoreEntry storedEntries = 2; | |||
} |
@@ -0,0 +1,37 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
// An IndexAlias is a wrapper around one or more | |||
// Index objects. It has two distinct modes of | |||
// operation. | |||
// 1. When it points to a single index, ALL index | |||
// operations are valid and will be passed through | |||
// to the underlying index. | |||
// 2. When it points to more than one index, the only | |||
// valid operation is Search. In this case the | |||
// search will be performed across all the | |||
// underlying indexes and the results merged. | |||
// Calls to Add/Remove/Swap the underlying indexes | |||
// are atomic, so you can safely change the | |||
// underlying Index objects while other components | |||
// are performing operations. | |||
type IndexAlias interface { | |||
Index | |||
Add(i ...Index) | |||
Remove(i ...Index) | |||
Swap(in, out []Index) | |||
} |
@@ -0,0 +1,605 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"sort" | |||
"sync" | |||
"time" | |||
"golang.org/x/net/context" | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
type indexAliasImpl struct { | |||
name string | |||
indexes []Index | |||
mutex sync.RWMutex | |||
open bool | |||
} | |||
// NewIndexAlias creates a new IndexAlias over the provided | |||
// Index objects. | |||
func NewIndexAlias(indexes ...Index) *indexAliasImpl { | |||
return &indexAliasImpl{ | |||
name: "alias", | |||
indexes: indexes, | |||
open: true, | |||
} | |||
} | |||
func (i *indexAliasImpl) isAliasToSingleIndex() error { | |||
if len(i.indexes) < 1 { | |||
return ErrorAliasEmpty | |||
} else if len(i.indexes) > 1 { | |||
return ErrorAliasMulti | |||
} | |||
return nil | |||
} | |||
func (i *indexAliasImpl) Index(id string, data interface{}) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return err | |||
} | |||
return i.indexes[0].Index(id, data) | |||
} | |||
func (i *indexAliasImpl) Delete(id string) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return err | |||
} | |||
return i.indexes[0].Delete(id) | |||
} | |||
func (i *indexAliasImpl) Batch(b *Batch) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return err | |||
} | |||
return i.indexes[0].Batch(b) | |||
} | |||
func (i *indexAliasImpl) Document(id string) (*document.Document, error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil, err | |||
} | |||
return i.indexes[0].Document(id) | |||
} | |||
func (i *indexAliasImpl) DocCount() (uint64, error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
rv := uint64(0) | |||
if !i.open { | |||
return 0, ErrorIndexClosed | |||
} | |||
for _, index := range i.indexes { | |||
otherCount, err := index.DocCount() | |||
if err == nil { | |||
rv += otherCount | |||
} | |||
// tolerate errors to produce partial counts | |||
} | |||
return rv, nil | |||
} | |||
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) { | |||
return i.SearchInContext(context.Background(), req) | |||
} | |||
func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
if len(i.indexes) < 1 { | |||
return nil, ErrorAliasEmpty | |||
} | |||
// short circuit the simple case | |||
if len(i.indexes) == 1 { | |||
return i.indexes[0].SearchInContext(ctx, req) | |||
} | |||
return MultiSearch(ctx, req, i.indexes...) | |||
} | |||
func (i *indexAliasImpl) Fields() ([]string, error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil, err | |||
} | |||
return i.indexes[0].Fields() | |||
} | |||
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) { | |||
i.mutex.RLock() | |||
if !i.open { | |||
i.mutex.RUnlock() | |||
return nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
fieldDict, err := i.indexes[0].FieldDict(field) | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
return &indexAliasImplFieldDict{ | |||
index: i, | |||
fieldDict: fieldDict, | |||
}, nil | |||
} | |||
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { | |||
i.mutex.RLock() | |||
if !i.open { | |||
i.mutex.RUnlock() | |||
return nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm) | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
return &indexAliasImplFieldDict{ | |||
index: i, | |||
fieldDict: fieldDict, | |||
}, nil | |||
} | |||
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { | |||
i.mutex.RLock() | |||
if !i.open { | |||
i.mutex.RUnlock() | |||
return nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix) | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
return &indexAliasImplFieldDict{ | |||
index: i, | |||
fieldDict: fieldDict, | |||
}, nil | |||
} | |||
func (i *indexAliasImpl) Close() error { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
i.open = false | |||
return nil | |||
} | |||
func (i *indexAliasImpl) Mapping() mapping.IndexMapping { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil | |||
} | |||
return i.indexes[0].Mapping() | |||
} | |||
func (i *indexAliasImpl) Stats() *IndexStat { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil | |||
} | |||
return i.indexes[0].Stats() | |||
} | |||
func (i *indexAliasImpl) StatsMap() map[string]interface{} { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil | |||
} | |||
return i.indexes[0].StatsMap() | |||
} | |||
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil, err | |||
} | |||
return i.indexes[0].GetInternal(key) | |||
} | |||
func (i *indexAliasImpl) SetInternal(key, val []byte) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return err | |||
} | |||
return i.indexes[0].SetInternal(key, val) | |||
} | |||
func (i *indexAliasImpl) DeleteInternal(key []byte) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return err | |||
} | |||
return i.indexes[0].DeleteInternal(key) | |||
} | |||
func (i *indexAliasImpl) Advanced() (index.Index, store.KVStore, error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, nil, ErrorIndexClosed | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil, nil, err | |||
} | |||
return i.indexes[0].Advanced() | |||
} | |||
func (i *indexAliasImpl) Add(indexes ...Index) { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
i.indexes = append(i.indexes, indexes...) | |||
} | |||
func (i *indexAliasImpl) removeSingle(index Index) { | |||
for pos, in := range i.indexes { | |||
if in == index { | |||
i.indexes = append(i.indexes[:pos], i.indexes[pos+1:]...) | |||
break | |||
} | |||
} | |||
} | |||
func (i *indexAliasImpl) Remove(indexes ...Index) { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
for _, in := range indexes { | |||
i.removeSingle(in) | |||
} | |||
} | |||
func (i *indexAliasImpl) Swap(in, out []Index) { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
// add | |||
i.indexes = append(i.indexes, in...) | |||
// delete | |||
for _, ind := range out { | |||
i.removeSingle(ind) | |||
} | |||
} | |||
// createChildSearchRequest creates a separate | |||
// request from the original | |||
// For now, avoid data race on req structure. | |||
// TODO disable highlight/field load on child | |||
// requests, and add code to do this only on | |||
// the actual final results. | |||
// Perhaps that part needs to be optional, | |||
// could be slower in remote usages. | |||
func createChildSearchRequest(req *SearchRequest) *SearchRequest { | |||
rv := SearchRequest{ | |||
Query: req.Query, | |||
Size: req.Size + req.From, | |||
From: 0, | |||
Highlight: req.Highlight, | |||
Fields: req.Fields, | |||
Facets: req.Facets, | |||
Explain: req.Explain, | |||
Sort: req.Sort, | |||
} | |||
return &rv | |||
} | |||
type asyncSearchResult struct { | |||
Name string | |||
Result *SearchResult | |||
Err error | |||
} | |||
// MultiSearch executes a SearchRequest across multiple Index objects, | |||
// then merges the results. The indexes must honor any ctx deadline. | |||
func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) { | |||
searchStart := time.Now() | |||
asyncResults := make(chan *asyncSearchResult, len(indexes)) | |||
// run search on each index in separate go routine | |||
var waitGroup sync.WaitGroup | |||
var searchChildIndex = func(in Index, childReq *SearchRequest) { | |||
rv := asyncSearchResult{Name: in.Name()} | |||
rv.Result, rv.Err = in.SearchInContext(ctx, childReq) | |||
asyncResults <- &rv | |||
waitGroup.Done() | |||
} | |||
waitGroup.Add(len(indexes)) | |||
for _, in := range indexes { | |||
go searchChildIndex(in, createChildSearchRequest(req)) | |||
} | |||
// on another go routine, close after finished | |||
go func() { | |||
waitGroup.Wait() | |||
close(asyncResults) | |||
}() | |||
var sr *SearchResult | |||
indexErrors := make(map[string]error) | |||
for asr := range asyncResults { | |||
if asr.Err == nil { | |||
if sr == nil { | |||
// first result | |||
sr = asr.Result | |||
} else { | |||
// merge with previous | |||
sr.Merge(asr.Result) | |||
} | |||
} else { | |||
indexErrors[asr.Name] = asr.Err | |||
} | |||
} | |||
// merge just concatenated all the hits | |||
// now lets clean it up | |||
// handle case where no results were successful | |||
if sr == nil { | |||
sr = &SearchResult{ | |||
Status: &SearchStatus{ | |||
Errors: make(map[string]error), | |||
}, | |||
} | |||
} | |||
// sort all hits with the requested order | |||
if len(req.Sort) > 0 { | |||
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) | |||
sort.Sort(sorter) | |||
} | |||
// now skip over the correct From | |||
if req.From > 0 && len(sr.Hits) > req.From { | |||
sr.Hits = sr.Hits[req.From:] | |||
} else if req.From > 0 { | |||
sr.Hits = search.DocumentMatchCollection{} | |||
} | |||
// now trim to the correct size | |||
if req.Size > 0 && len(sr.Hits) > req.Size { | |||
sr.Hits = sr.Hits[0:req.Size] | |||
} | |||
// fix up facets | |||
for name, fr := range req.Facets { | |||
sr.Facets.Fixup(name, fr.Size) | |||
} | |||
// fix up original request | |||
sr.Request = req | |||
searchDuration := time.Since(searchStart) | |||
sr.Took = searchDuration | |||
// fix up errors | |||
if len(indexErrors) > 0 { | |||
if sr.Status.Errors == nil { | |||
sr.Status.Errors = make(map[string]error) | |||
} | |||
for indexName, indexErr := range indexErrors { | |||
sr.Status.Errors[indexName] = indexErr | |||
sr.Status.Total++ | |||
sr.Status.Failed++ | |||
} | |||
} | |||
return sr, nil | |||
} | |||
func (i *indexAliasImpl) NewBatch() *Batch { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil | |||
} | |||
err := i.isAliasToSingleIndex() | |||
if err != nil { | |||
return nil | |||
} | |||
return i.indexes[0].NewBatch() | |||
} | |||
func (i *indexAliasImpl) Name() string { | |||
return i.name | |||
} | |||
func (i *indexAliasImpl) SetName(name string) { | |||
i.name = name | |||
} | |||
type indexAliasImplFieldDict struct { | |||
index *indexAliasImpl | |||
fieldDict index.FieldDict | |||
} | |||
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) { | |||
return f.fieldDict.Next() | |||
} | |||
func (f *indexAliasImplFieldDict) Close() error { | |||
defer f.index.mutex.RUnlock() | |||
return f.fieldDict.Close() | |||
} | |||
type multiSearchHitSorter struct { | |||
hits search.DocumentMatchCollection | |||
sort search.SortOrder | |||
cachedScoring []bool | |||
cachedDesc []bool | |||
} | |||
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { | |||
return &multiSearchHitSorter{ | |||
sort: sort, | |||
hits: hits, | |||
cachedScoring: sort.CacheIsScore(), | |||
cachedDesc: sort.CacheDescending(), | |||
} | |||
} | |||
func (m *multiSearchHitSorter) Len() int { return len(m.hits) } | |||
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } | |||
func (m *multiSearchHitSorter) Less(i, j int) bool { | |||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) | |||
return c < 0 | |||
} |
@@ -0,0 +1,729 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"os" | |||
"sync" | |||
"sync/atomic" | |||
"time" | |||
"golang.org/x/net/context" | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/store" | |||
"github.com/blevesearch/bleve/index/upsidedown" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/collector" | |||
"github.com/blevesearch/bleve/search/facet" | |||
"github.com/blevesearch/bleve/search/highlight" | |||
) | |||
type indexImpl struct { | |||
path string | |||
name string | |||
meta *indexMeta | |||
i index.Index | |||
m mapping.IndexMapping | |||
mutex sync.RWMutex | |||
open bool | |||
stats *IndexStat | |||
} | |||
const storePath = "store" | |||
var mappingInternalKey = []byte("_mapping") | |||
func indexStorePath(path string) string { | |||
return path + string(os.PathSeparator) + storePath | |||
} | |||
func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) { | |||
// first validate the mapping | |||
err := mapping.Validate() | |||
if err != nil { | |||
return nil, err | |||
} | |||
if kvconfig == nil { | |||
kvconfig = map[string]interface{}{} | |||
} | |||
if kvstore == "" { | |||
return nil, fmt.Errorf("bleve not configured for file based indexing") | |||
} | |||
rv := indexImpl{ | |||
path: path, | |||
name: path, | |||
m: mapping, | |||
meta: newIndexMeta(indexType, kvstore, kvconfig), | |||
} | |||
rv.stats = &IndexStat{i: &rv} | |||
// at this point there is hope that we can be successful, so save index meta | |||
if path != "" { | |||
err = rv.meta.Save(path) | |||
if err != nil { | |||
return nil, err | |||
} | |||
kvconfig["create_if_missing"] = true | |||
kvconfig["error_if_exists"] = true | |||
kvconfig["path"] = indexStorePath(path) | |||
} else { | |||
kvconfig["path"] = "" | |||
} | |||
// open the index | |||
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) | |||
if indexTypeConstructor == nil { | |||
return nil, ErrorUnknownIndexType | |||
} | |||
rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = rv.i.Open() | |||
if err != nil { | |||
if err == index.ErrorUnknownStorageType { | |||
return nil, ErrorUnknownStorageType | |||
} | |||
return nil, err | |||
} | |||
// now persist the mapping | |||
mappingBytes, err := json.Marshal(mapping) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = rv.i.SetInternal(mappingInternalKey, mappingBytes) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// mark the index as open | |||
rv.mutex.Lock() | |||
defer rv.mutex.Unlock() | |||
rv.open = true | |||
indexStats.Register(&rv) | |||
return &rv, nil | |||
} | |||
func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) { | |||
rv = &indexImpl{ | |||
path: path, | |||
name: path, | |||
} | |||
rv.stats = &IndexStat{i: rv} | |||
rv.meta, err = openIndexMeta(path) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// backwards compatibility if index type is missing | |||
if rv.meta.IndexType == "" { | |||
rv.meta.IndexType = upsidedown.Name | |||
} | |||
storeConfig := rv.meta.Config | |||
if storeConfig == nil { | |||
storeConfig = map[string]interface{}{} | |||
} | |||
storeConfig["path"] = indexStorePath(path) | |||
storeConfig["create_if_missing"] = false | |||
storeConfig["error_if_exists"] = false | |||
for rck, rcv := range runtimeConfig { | |||
storeConfig[rck] = rcv | |||
} | |||
// open the index | |||
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) | |||
if indexTypeConstructor == nil { | |||
return nil, ErrorUnknownIndexType | |||
} | |||
rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = rv.i.Open() | |||
if err != nil { | |||
if err == index.ErrorUnknownStorageType { | |||
return nil, ErrorUnknownStorageType | |||
} | |||
return nil, err | |||
} | |||
// now load the mapping | |||
indexReader, err := rv.i.Reader() | |||
if err != nil { | |||
return nil, err | |||
} | |||
defer func() { | |||
if cerr := indexReader.Close(); cerr != nil && err == nil { | |||
err = cerr | |||
} | |||
}() | |||
mappingBytes, err := indexReader.GetInternal(mappingInternalKey) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var im *mapping.IndexMappingImpl | |||
err = json.Unmarshal(mappingBytes, &im) | |||
if err != nil { | |||
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) | |||
} | |||
// mark the index as open | |||
rv.mutex.Lock() | |||
defer rv.mutex.Unlock() | |||
rv.open = true | |||
// validate the mapping | |||
err = im.Validate() | |||
if err != nil { | |||
// note even if the mapping is invalid | |||
// we still return an open usable index | |||
return rv, err | |||
} | |||
rv.m = im | |||
indexStats.Register(rv) | |||
return rv, err | |||
} | |||
// Advanced returns implementation internals | |||
// necessary ONLY for advanced usage. | |||
func (i *indexImpl) Advanced() (index.Index, store.KVStore, error) { | |||
s, err := i.i.Advanced() | |||
if err != nil { | |||
return nil, nil, err | |||
} | |||
return i.i, s, nil | |||
} | |||
// Mapping returns the IndexMapping in use by this | |||
// Index. | |||
func (i *indexImpl) Mapping() mapping.IndexMapping { | |||
return i.m | |||
} | |||
// Index the object with the specified identifier. | |||
// The IndexMapping for this index will determine | |||
// how the object is indexed. | |||
func (i *indexImpl) Index(id string, data interface{}) (err error) { | |||
if id == "" { | |||
return ErrorEmptyID | |||
} | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
doc := document.NewDocument(id) | |||
err = i.m.MapDocument(doc, data) | |||
if err != nil { | |||
return | |||
} | |||
err = i.i.Update(doc) | |||
return | |||
} | |||
// Delete entries for the specified identifier from | |||
// the index. | |||
func (i *indexImpl) Delete(id string) (err error) { | |||
if id == "" { | |||
return ErrorEmptyID | |||
} | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err = i.i.Delete(id) | |||
return | |||
} | |||
// Batch executes multiple Index and Delete | |||
// operations at the same time. There are often | |||
// significant performance benefits when performing | |||
// operations in a batch. | |||
func (i *indexImpl) Batch(b *Batch) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
return i.i.Batch(b.internal) | |||
} | |||
// Document is used to find the values of all the | |||
// stored fields for a document in the index. These | |||
// stored fields are put back into a Document object | |||
// and returned. | |||
func (i *indexImpl) Document(id string) (doc *document.Document, err error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
return nil, err | |||
} | |||
defer func() { | |||
if cerr := indexReader.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
doc, err = indexReader.Document(id) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return doc, nil | |||
} | |||
// DocCount returns the number of documents in the | |||
// index. | |||
func (i *indexImpl) DocCount() (count uint64, err error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return 0, ErrorIndexClosed | |||
} | |||
// open a reader for this search | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
return 0, fmt.Errorf("error opening index reader %v", err) | |||
} | |||
defer func() { | |||
if cerr := indexReader.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
count, err = indexReader.DocCount() | |||
return | |||
} | |||
// Search executes a search request operation. | |||
// Returns a SearchResult object or an error. | |||
func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { | |||
return i.SearchInContext(context.Background(), req) | |||
} | |||
// SearchInContext executes a search request operation within the provided | |||
// Context. Returns a SearchResult object or an error. | |||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
searchStart := time.Now() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) | |||
// open a reader for this search | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
return nil, fmt.Errorf("error opening index reader %v", err) | |||
} | |||
defer func() { | |||
if cerr := indexReader.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain) | |||
if err != nil { | |||
return nil, err | |||
} | |||
defer func() { | |||
if serr := searcher.Close(); err == nil && serr != nil { | |||
err = serr | |||
} | |||
}() | |||
if req.Facets != nil { | |||
facetsBuilder := search.NewFacetsBuilder(indexReader) | |||
for facetName, facetRequest := range req.Facets { | |||
if facetRequest.NumericRanges != nil { | |||
// build numeric range facet | |||
facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size) | |||
for _, nr := range facetRequest.NumericRanges { | |||
facetBuilder.AddRange(nr.Name, nr.Min, nr.Max) | |||
} | |||
facetsBuilder.Add(facetName, facetBuilder) | |||
} else if facetRequest.DateTimeRanges != nil { | |||
// build date range facet | |||
facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size) | |||
dateTimeParser := i.m.DateTimeParserNamed("") | |||
for _, dr := range facetRequest.DateTimeRanges { | |||
start, end := dr.ParseDates(dateTimeParser) | |||
facetBuilder.AddRange(dr.Name, start, end) | |||
} | |||
facetsBuilder.Add(facetName, facetBuilder) | |||
} else { | |||
// build terms facet | |||
facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size) | |||
facetsBuilder.Add(facetName, facetBuilder) | |||
} | |||
} | |||
collector.SetFacetsBuilder(facetsBuilder) | |||
} | |||
err = collector.Collect(ctx, searcher, indexReader) | |||
if err != nil { | |||
return nil, err | |||
} | |||
hits := collector.Results() | |||
var highlighter highlight.Highlighter | |||
if req.Highlight != nil { | |||
// get the right highlighter | |||
highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if req.Highlight.Style != nil { | |||
highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
if highlighter == nil { | |||
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style) | |||
} | |||
} | |||
for _, hit := range hits { | |||
if len(req.Fields) > 0 || highlighter != nil { | |||
doc, err := indexReader.Document(hit.ID) | |||
if err == nil && doc != nil { | |||
if len(req.Fields) > 0 { | |||
for _, f := range req.Fields { | |||
for _, docF := range doc.Fields { | |||
if f == "*" || docF.Name() == f { | |||
var value interface{} | |||
switch docF := docF.(type) { | |||
case *document.TextField: | |||
value = string(docF.Value()) | |||
case *document.NumericField: | |||
num, err := docF.Number() | |||
if err == nil { | |||
value = num | |||
} | |||
case *document.DateTimeField: | |||
datetime, err := docF.DateTime() | |||
if err == nil { | |||
value = datetime.Format(time.RFC3339) | |||
} | |||
case *document.BooleanField: | |||
boolean, err := docF.Boolean() | |||
if err == nil { | |||
value = boolean | |||
} | |||
} | |||
if value != nil { | |||
hit.AddFieldValue(docF.Name(), value) | |||
} | |||
} | |||
} | |||
} | |||
} | |||
if highlighter != nil { | |||
highlightFields := req.Highlight.Fields | |||
if highlightFields == nil { | |||
// add all fields with matches | |||
highlightFields = make([]string, 0, len(hit.Locations)) | |||
for k := range hit.Locations { | |||
highlightFields = append(highlightFields, k) | |||
} | |||
} | |||
for _, hf := range highlightFields { | |||
highlighter.BestFragmentsInField(hit, doc, hf, 1) | |||
} | |||
} | |||
} else if doc == nil { | |||
// unexpected case, a doc ID that was found as a search hit | |||
// was unable to be found during document lookup | |||
return nil, ErrorIndexReadInconsistency | |||
} | |||
} | |||
if i.name != "" { | |||
hit.Index = i.name | |||
} | |||
} | |||
atomic.AddUint64(&i.stats.searches, 1) | |||
searchDuration := time.Since(searchStart) | |||
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration)) | |||
if Config.SlowSearchLogThreshold > 0 && | |||
searchDuration > Config.SlowSearchLogThreshold { | |||
logger.Printf("slow search took %s - %v", searchDuration, req) | |||
} | |||
return &SearchResult{ | |||
Status: &SearchStatus{ | |||
Total: 1, | |||
Failed: 0, | |||
Successful: 1, | |||
Errors: make(map[string]error), | |||
}, | |||
Request: req, | |||
Hits: hits, | |||
Total: collector.Total(), | |||
MaxScore: collector.MaxScore(), | |||
Took: searchDuration, | |||
Facets: collector.FacetResults(), | |||
}, nil | |||
} | |||
// Fields returns the name of all the fields this | |||
// Index has operated on. | |||
func (i *indexImpl) Fields() (fields []string, err error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
return nil, err | |||
} | |||
defer func() { | |||
if cerr := indexReader.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
fields, err = indexReader.Fields() | |||
if err != nil { | |||
return nil, err | |||
} | |||
return fields, nil | |||
} | |||
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) { | |||
i.mutex.RLock() | |||
if !i.open { | |||
i.mutex.RUnlock() | |||
return nil, ErrorIndexClosed | |||
} | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
fieldDict, err := indexReader.FieldDict(field) | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
return &indexImplFieldDict{ | |||
index: i, | |||
indexReader: indexReader, | |||
fieldDict: fieldDict, | |||
}, nil | |||
} | |||
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { | |||
i.mutex.RLock() | |||
if !i.open { | |||
i.mutex.RUnlock() | |||
return nil, ErrorIndexClosed | |||
} | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm) | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
return &indexImplFieldDict{ | |||
index: i, | |||
indexReader: indexReader, | |||
fieldDict: fieldDict, | |||
}, nil | |||
} | |||
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { | |||
i.mutex.RLock() | |||
if !i.open { | |||
i.mutex.RUnlock() | |||
return nil, ErrorIndexClosed | |||
} | |||
indexReader, err := i.i.Reader() | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix) | |||
if err != nil { | |||
i.mutex.RUnlock() | |||
return nil, err | |||
} | |||
return &indexImplFieldDict{ | |||
index: i, | |||
indexReader: indexReader, | |||
fieldDict: fieldDict, | |||
}, nil | |||
} | |||
func (i *indexImpl) Close() error { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
indexStats.UnRegister(i) | |||
i.open = false | |||
return i.i.Close() | |||
} | |||
func (i *indexImpl) Stats() *IndexStat { | |||
return i.stats | |||
} | |||
func (i *indexImpl) StatsMap() map[string]interface{} { | |||
return i.stats.statsMap() | |||
} | |||
func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return nil, ErrorIndexClosed | |||
} | |||
reader, err := i.i.Reader() | |||
if err != nil { | |||
return nil, err | |||
} | |||
defer func() { | |||
if cerr := reader.Close(); err == nil && cerr != nil { | |||
err = cerr | |||
} | |||
}() | |||
val, err = reader.GetInternal(key) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return val, nil | |||
} | |||
func (i *indexImpl) SetInternal(key, val []byte) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
return i.i.SetInternal(key, val) | |||
} | |||
func (i *indexImpl) DeleteInternal(key []byte) error { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
return i.i.DeleteInternal(key) | |||
} | |||
// NewBatch creates a new empty batch. | |||
func (i *indexImpl) NewBatch() *Batch { | |||
return &Batch{ | |||
index: i, | |||
internal: index.NewBatch(), | |||
} | |||
} | |||
func (i *indexImpl) Name() string { | |||
return i.name | |||
} | |||
func (i *indexImpl) SetName(name string) { | |||
indexStats.UnRegister(i) | |||
i.name = name | |||
indexStats.Register(i) | |||
} | |||
type indexImplFieldDict struct { | |||
index *indexImpl | |||
indexReader index.IndexReader | |||
fieldDict index.FieldDict | |||
} | |||
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) { | |||
return f.fieldDict.Next() | |||
} | |||
func (f *indexImplFieldDict) Close() error { | |||
defer f.index.mutex.RUnlock() | |||
err := f.fieldDict.Close() | |||
if err != nil { | |||
return err | |||
} | |||
return f.indexReader.Close() | |||
} |
@@ -0,0 +1,96 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"encoding/json" | |||
"io/ioutil" | |||
"os" | |||
"github.com/blevesearch/bleve/index/upsidedown" | |||
) | |||
const metaFilename = "index_meta.json" | |||
type indexMeta struct { | |||
Storage string `json:"storage"` | |||
IndexType string `json:"index_type"` | |||
Config map[string]interface{} `json:"config,omitempty"` | |||
} | |||
func newIndexMeta(indexType string, storage string, config map[string]interface{}) *indexMeta { | |||
return &indexMeta{ | |||
IndexType: indexType, | |||
Storage: storage, | |||
Config: config, | |||
} | |||
} | |||
func openIndexMeta(path string) (*indexMeta, error) { | |||
if _, err := os.Stat(path); os.IsNotExist(err) { | |||
return nil, ErrorIndexPathDoesNotExist | |||
} | |||
indexMetaPath := indexMetaPath(path) | |||
metaBytes, err := ioutil.ReadFile(indexMetaPath) | |||
if err != nil { | |||
return nil, ErrorIndexMetaMissing | |||
} | |||
var im indexMeta | |||
err = json.Unmarshal(metaBytes, &im) | |||
if err != nil { | |||
return nil, ErrorIndexMetaCorrupt | |||
} | |||
if im.IndexType == "" { | |||
im.IndexType = upsidedown.Name | |||
} | |||
return &im, nil | |||
} | |||
func (i *indexMeta) Save(path string) (err error) { | |||
indexMetaPath := indexMetaPath(path) | |||
// ensure any necessary parent directories exist | |||
err = os.MkdirAll(path, 0700) | |||
if err != nil { | |||
if os.IsExist(err) { | |||
return ErrorIndexPathExists | |||
} | |||
return err | |||
} | |||
metaBytes, err := json.Marshal(i) | |||
if err != nil { | |||
return err | |||
} | |||
indexMetaFile, err := os.OpenFile(indexMetaPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666) | |||
if err != nil { | |||
if os.IsExist(err) { | |||
return ErrorIndexPathExists | |||
} | |||
return err | |||
} | |||
defer func() { | |||
if ierr := indexMetaFile.Close(); err == nil && ierr != nil { | |||
err = ierr | |||
} | |||
}() | |||
_, err = indexMetaFile.Write(metaBytes) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func indexMetaPath(path string) string { | |||
return path + string(os.PathSeparator) + metaFilename | |||
} |
@@ -0,0 +1,75 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"encoding/json" | |||
"sync" | |||
"sync/atomic" | |||
) | |||
type IndexStat struct { | |||
searches uint64 | |||
searchTime uint64 | |||
i *indexImpl | |||
} | |||
func (is *IndexStat) statsMap() map[string]interface{} { | |||
m := map[string]interface{}{} | |||
m["index"] = is.i.i.StatsMap() | |||
m["searches"] = atomic.LoadUint64(&is.searches) | |||
m["search_time"] = atomic.LoadUint64(&is.searchTime) | |||
return m | |||
} | |||
func (is *IndexStat) MarshalJSON() ([]byte, error) { | |||
m := is.statsMap() | |||
return json.Marshal(m) | |||
} | |||
type IndexStats struct { | |||
indexes map[string]*IndexStat | |||
mutex sync.RWMutex | |||
} | |||
func NewIndexStats() *IndexStats { | |||
return &IndexStats{ | |||
indexes: make(map[string]*IndexStat), | |||
} | |||
} | |||
func (i *IndexStats) Register(index Index) { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
i.indexes[index.Name()] = index.Stats() | |||
} | |||
func (i *IndexStats) UnRegister(index Index) { | |||
i.mutex.Lock() | |||
defer i.mutex.Unlock() | |||
delete(i.indexes, index.Name()) | |||
} | |||
func (i *IndexStats) String() string { | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
bytes, err := json.Marshal(i.indexes) | |||
if err != nil { | |||
return "error marshaling stats" | |||
} | |||
return string(bytes) | |||
} | |||
var indexStats *IndexStats |
@@ -0,0 +1,61 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import "github.com/blevesearch/bleve/mapping" | |||
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules | |||
func NewIndexMapping() *mapping.IndexMappingImpl { | |||
return mapping.NewIndexMapping() | |||
} | |||
// NewDocumentMapping returns a new document mapping | |||
// with all the default values. | |||
func NewDocumentMapping() *mapping.DocumentMapping { | |||
return mapping.NewDocumentMapping() | |||
} | |||
// NewDocumentStaticMapping returns a new document | |||
// mapping that will not automatically index parts | |||
// of a document without an explicit mapping. | |||
func NewDocumentStaticMapping() *mapping.DocumentMapping { | |||
return mapping.NewDocumentStaticMapping() | |||
} | |||
// NewDocumentDisabledMapping returns a new document | |||
// mapping that will not perform any indexing. | |||
func NewDocumentDisabledMapping() *mapping.DocumentMapping { | |||
return mapping.NewDocumentDisabledMapping() | |||
} | |||
// NewTextFieldMapping returns a default field mapping for text | |||
func NewTextFieldMapping() *mapping.FieldMapping { | |||
return mapping.NewTextFieldMapping() | |||
} | |||
// NewNumericFieldMapping returns a default field mapping for numbers | |||
func NewNumericFieldMapping() *mapping.FieldMapping { | |||
return mapping.NewNumericFieldMapping() | |||
} | |||
// NewDateTimeFieldMapping returns a default field mapping for dates | |||
func NewDateTimeFieldMapping() *mapping.FieldMapping { | |||
return mapping.NewDateTimeFieldMapping() | |||
} | |||
// NewBooleanFieldMapping returns a default field mapping for booleans | |||
func NewBooleanFieldMapping() *mapping.FieldMapping { | |||
return mapping.NewBooleanFieldMapping() | |||
} |
@@ -0,0 +1,99 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package mapping | |||
type customAnalysis struct { | |||
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"` | |||
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"` | |||
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"` | |||
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"` | |||
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"` | |||
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"` | |||
} | |||
func (c *customAnalysis) registerAll(i *IndexMappingImpl) error { | |||
for name, config := range c.CharFilters { | |||
_, err := i.cache.DefineCharFilter(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
if len(c.Tokenizers) > 0 { | |||
// put all the names in map tracking work to do | |||
todo := map[string]struct{}{} | |||
for name := range c.Tokenizers { | |||
todo[name] = struct{}{} | |||
} | |||
registered := 1 | |||
errs := []error{} | |||
// as long as we keep making progress, keep going | |||
for len(todo) > 0 && registered > 0 { | |||
registered = 0 | |||
errs = []error{} | |||
for name := range todo { | |||
config := c.Tokenizers[name] | |||
_, err := i.cache.DefineTokenizer(name, config) | |||
if err != nil { | |||
errs = append(errs, err) | |||
} else { | |||
delete(todo, name) | |||
registered++ | |||
} | |||
} | |||
} | |||
if len(errs) > 0 { | |||
return errs[0] | |||
} | |||
} | |||
for name, config := range c.TokenMaps { | |||
_, err := i.cache.DefineTokenMap(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
for name, config := range c.TokenFilters { | |||
_, err := i.cache.DefineTokenFilter(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
for name, config := range c.Analyzers { | |||
_, err := i.cache.DefineAnalyzer(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
for name, config := range c.DateTimeParsers { | |||
_, err := i.cache.DefineDateTimeParser(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
return nil | |||
} | |||
func newCustomAnalysis() *customAnalysis { | |||
rv := customAnalysis{ | |||
CharFilters: make(map[string]map[string]interface{}), | |||
Tokenizers: make(map[string]map[string]interface{}), | |||
TokenMaps: make(map[string]map[string]interface{}), | |||
TokenFilters: make(map[string]map[string]interface{}), | |||
Analyzers: make(map[string]map[string]interface{}), | |||
DateTimeParsers: make(map[string]map[string]interface{}), | |||
} | |||
return &rv | |||
} |
@@ -0,0 +1,490 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package mapping | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"reflect" | |||
"time" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
// A DocumentMapping describes how a type of document | |||
// should be indexed. | |||
// As documents can be hierarchical, named sub-sections | |||
// of documents are mapped using the same structure in | |||
// the Properties field. | |||
// Each value inside a document can be indexed 0 or more | |||
// ways. These index entries are called fields and | |||
// are stored in the Fields field. | |||
// Entire sections of a document can be ignored or | |||
// excluded by setting Enabled to false. | |||
// If not explicitly mapped, default mapping operations | |||
// are used. To disable this automatic handling, set | |||
// Dynamic to false. | |||
type DocumentMapping struct { | |||
Enabled bool `json:"enabled"` | |||
Dynamic bool `json:"dynamic"` | |||
Properties map[string]*DocumentMapping `json:"properties,omitempty"` | |||
Fields []*FieldMapping `json:"fields,omitempty"` | |||
DefaultAnalyzer string `json:"default_analyzer"` | |||
// StructTagKey overrides "json" when looking for field names in struct tags | |||
StructTagKey string `json:"struct_tag_key,omitempty"` | |||
} | |||
func (dm *DocumentMapping) Validate(cache *registry.Cache) error { | |||
var err error | |||
if dm.DefaultAnalyzer != "" { | |||
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
for _, property := range dm.Properties { | |||
err = property.Validate(cache) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
for _, field := range dm.Fields { | |||
if field.Analyzer != "" { | |||
_, err = cache.AnalyzerNamed(field.Analyzer) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
if field.DateFormat != "" { | |||
_, err = cache.DateTimeParserNamed(field.DateFormat) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
switch field.Type { | |||
case "text", "datetime", "number", "boolean": | |||
default: | |||
return fmt.Errorf("unknown field type: '%s'", field.Type) | |||
} | |||
} | |||
return nil | |||
} | |||
// analyzerNameForPath attempts to first find the field | |||
// described by this path, then returns the analyzer | |||
// configured for that field | |||
func (dm *DocumentMapping) analyzerNameForPath(path string) string { | |||
field := dm.fieldDescribedByPath(path) | |||
if field != nil { | |||
return field.Analyzer | |||
} | |||
return "" | |||
} | |||
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping { | |||
pathElements := decodePath(path) | |||
if len(pathElements) > 1 { | |||
// easy case, there is more than 1 path element remaining | |||
// the next path element must match a property name | |||
// at this level | |||
for propName, subDocMapping := range dm.Properties { | |||
if propName == pathElements[0] { | |||
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:])) | |||
} | |||
} | |||
} else { | |||
// just 1 path elememnt | |||
// first look for property name with empty field | |||
for propName, subDocMapping := range dm.Properties { | |||
if propName == pathElements[0] { | |||
// found property name match, now look at its fields | |||
for _, field := range subDocMapping.Fields { | |||
if field.Name == "" || field.Name == pathElements[0] { | |||
// match | |||
return field | |||
} | |||
} | |||
} | |||
} | |||
// next, walk the properties again, looking for field overriding the name | |||
for propName, subDocMapping := range dm.Properties { | |||
if propName != pathElements[0] { | |||
// property name isn't a match, but field name could override it | |||
for _, field := range subDocMapping.Fields { | |||
if field.Name == pathElements[0] { | |||
return field | |||
} | |||
} | |||
} | |||
} | |||
} | |||
return nil | |||
} | |||
// documentMappingForPath only returns EXACT matches for a sub document | |||
// or for an explicitly mapped field, if you want to find the | |||
// closest document mapping to a field not explicitly mapped | |||
// use closestDocMapping | |||
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping { | |||
pathElements := decodePath(path) | |||
current := dm | |||
OUTER: | |||
for i, pathElement := range pathElements { | |||
for name, subDocMapping := range current.Properties { | |||
if name == pathElement { | |||
current = subDocMapping | |||
continue OUTER | |||
} | |||
} | |||
// no subDocMapping matches this pathElement | |||
// only if this is the last element check for field name | |||
if i == len(pathElements)-1 { | |||
for _, field := range current.Fields { | |||
if field.Name == pathElement { | |||
break | |||
} | |||
} | |||
} | |||
return nil | |||
} | |||
return current | |||
} | |||
// closestDocMapping findest the most specific document mapping that matches | |||
// part of the provided path | |||
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping { | |||
pathElements := decodePath(path) | |||
current := dm | |||
OUTER: | |||
for _, pathElement := range pathElements { | |||
for name, subDocMapping := range current.Properties { | |||
if name == pathElement { | |||
current = subDocMapping | |||
continue OUTER | |||
} | |||
} | |||
} | |||
return current | |||
} | |||
// NewDocumentMapping returns a new document mapping | |||
// with all the default values. | |||
func NewDocumentMapping() *DocumentMapping { | |||
return &DocumentMapping{ | |||
Enabled: true, | |||
Dynamic: true, | |||
} | |||
} | |||
// NewDocumentStaticMapping returns a new document | |||
// mapping that will not automatically index parts | |||
// of a document without an explicit mapping. | |||
func NewDocumentStaticMapping() *DocumentMapping { | |||
return &DocumentMapping{ | |||
Enabled: true, | |||
} | |||
} | |||
// NewDocumentDisabledMapping returns a new document | |||
// mapping that will not perform any indexing. | |||
func NewDocumentDisabledMapping() *DocumentMapping { | |||
return &DocumentMapping{} | |||
} | |||
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping | |||
// for the specified named subsection. | |||
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) { | |||
if dm.Properties == nil { | |||
dm.Properties = make(map[string]*DocumentMapping) | |||
} | |||
dm.Properties[property] = sdm | |||
} | |||
// AddFieldMappingsAt adds one or more FieldMappings | |||
// at the named sub-document. If the named sub-document | |||
// doesn't yet exist it is created for you. | |||
// This is a convenience function to make most common | |||
// mappings more concise. | |||
// Otherwise, you would: | |||
// subMapping := NewDocumentMapping() | |||
// subMapping.AddFieldMapping(fieldMapping) | |||
// parentMapping.AddSubDocumentMapping(property, subMapping) | |||
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) { | |||
if dm.Properties == nil { | |||
dm.Properties = make(map[string]*DocumentMapping) | |||
} | |||
sdm, ok := dm.Properties[property] | |||
if !ok { | |||
sdm = NewDocumentMapping() | |||
} | |||
for _, fm := range fms { | |||
sdm.AddFieldMapping(fm) | |||
} | |||
dm.Properties[property] = sdm | |||
} | |||
// AddFieldMapping adds the provided FieldMapping for this section | |||
// of the document. | |||
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) { | |||
if dm.Fields == nil { | |||
dm.Fields = make([]*FieldMapping, 0) | |||
} | |||
dm.Fields = append(dm.Fields, fm) | |||
} | |||
// UnmarshalJSON offers custom unmarshaling with optional strict validation | |||
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error { | |||
var tmp map[string]json.RawMessage | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
// set defaults for fields which might have been omitted | |||
dm.Enabled = true | |||
dm.Dynamic = true | |||
var invalidKeys []string | |||
for k, v := range tmp { | |||
switch k { | |||
case "enabled": | |||
err := json.Unmarshal(v, &dm.Enabled) | |||
if err != nil { | |||
return err | |||
} | |||
case "dynamic": | |||
err := json.Unmarshal(v, &dm.Dynamic) | |||
if err != nil { | |||
return err | |||
} | |||
case "default_analyzer": | |||
err := json.Unmarshal(v, &dm.DefaultAnalyzer) | |||
if err != nil { | |||
return err | |||
} | |||
case "properties": | |||
err := json.Unmarshal(v, &dm.Properties) | |||
if err != nil { | |||
return err | |||
} | |||
case "fields": | |||
err := json.Unmarshal(v, &dm.Fields) | |||
if err != nil { | |||
return err | |||
} | |||
case "struct_tag_key": | |||
err := json.Unmarshal(v, &dm.StructTagKey) | |||
if err != nil { | |||
return err | |||
} | |||
default: | |||
invalidKeys = append(invalidKeys, k) | |||
} | |||
} | |||
if MappingJSONStrict && len(invalidKeys) > 0 { | |||
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys) | |||
} | |||
return nil | |||
} | |||
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string { | |||
rv := "" | |||
current := dm | |||
for _, pathElement := range path { | |||
var ok bool | |||
current, ok = current.Properties[pathElement] | |||
if !ok { | |||
break | |||
} | |||
if current.DefaultAnalyzer != "" { | |||
rv = current.DefaultAnalyzer | |||
} | |||
} | |||
return rv | |||
} | |||
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) { | |||
// allow default "json" tag to be overriden | |||
structTagKey := dm.StructTagKey | |||
if structTagKey == "" { | |||
structTagKey = "json" | |||
} | |||
val := reflect.ValueOf(data) | |||
typ := val.Type() | |||
switch typ.Kind() { | |||
case reflect.Map: | |||
// FIXME can add support for other map keys in the future | |||
if typ.Key().Kind() == reflect.String { | |||
for _, key := range val.MapKeys() { | |||
fieldName := key.String() | |||
fieldVal := val.MapIndex(key).Interface() | |||
dm.processProperty(fieldVal, append(path, fieldName), indexes, context) | |||
} | |||
} | |||
case reflect.Struct: | |||
for i := 0; i < val.NumField(); i++ { | |||
field := typ.Field(i) | |||
fieldName := field.Name | |||
// anonymous fields of type struct can elide the type name | |||
if field.Anonymous && field.Type.Kind() == reflect.Struct { | |||
fieldName = "" | |||
} | |||
// if the field has a name under the specified tag, prefer that | |||
tag := field.Tag.Get(structTagKey) | |||
tagFieldName := parseTagName(tag) | |||
if tagFieldName == "-" { | |||
continue | |||
} | |||
// allow tag to set field name to empty, only if anonymous | |||
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) { | |||
fieldName = tagFieldName | |||
} | |||
if val.Field(i).CanInterface() { | |||
fieldVal := val.Field(i).Interface() | |||
newpath := path | |||
if fieldName != "" { | |||
newpath = append(path, fieldName) | |||
} | |||
dm.processProperty(fieldVal, newpath, indexes, context) | |||
} | |||
} | |||
case reflect.Slice, reflect.Array: | |||
for i := 0; i < val.Len(); i++ { | |||
if val.Index(i).CanInterface() { | |||
fieldVal := val.Index(i).Interface() | |||
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context) | |||
} | |||
} | |||
case reflect.Ptr: | |||
ptrElem := val.Elem() | |||
if ptrElem.IsValid() && ptrElem.CanInterface() { | |||
dm.processProperty(ptrElem.Interface(), path, indexes, context) | |||
} | |||
case reflect.String: | |||
dm.processProperty(val.String(), path, indexes, context) | |||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: | |||
dm.processProperty(float64(val.Int()), path, indexes, context) | |||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: | |||
dm.processProperty(float64(val.Uint()), path, indexes, context) | |||
case reflect.Float32, reflect.Float64: | |||
dm.processProperty(float64(val.Float()), path, indexes, context) | |||
case reflect.Bool: | |||
dm.processProperty(val.Bool(), path, indexes, context) | |||
} | |||
} | |||
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) { | |||
pathString := encodePath(path) | |||
// look to see if there is a mapping for this field | |||
subDocMapping := dm.documentMappingForPath(pathString) | |||
closestDocMapping := dm.closestDocMapping(pathString) | |||
// check to see if we even need to do further processing | |||
if subDocMapping != nil && !subDocMapping.Enabled { | |||
return | |||
} | |||
propertyValue := reflect.ValueOf(property) | |||
if !propertyValue.IsValid() { | |||
// cannot do anything with the zero value | |||
return | |||
} | |||
propertyType := propertyValue.Type() | |||
switch propertyType.Kind() { | |||
case reflect.String: | |||
propertyValueString := propertyValue.String() | |||
if subDocMapping != nil { | |||
// index by explicit mapping | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) | |||
} | |||
} else if closestDocMapping.Dynamic { | |||
// automatic indexing behavior | |||
// first see if it can be parsed by the default date parser | |||
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser) | |||
if dateTimeParser != nil { | |||
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) | |||
if err != nil { | |||
// index as text | |||
fieldMapping := newTextFieldMappingDynamic(context.im) | |||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) | |||
} else { | |||
// index as datetime | |||
fieldMapping := newDateTimeFieldMappingDynamic(context.im) | |||
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context) | |||
} | |||
} | |||
} | |||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: | |||
dm.processProperty(float64(propertyValue.Int()), path, indexes, context) | |||
return | |||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: | |||
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context) | |||
return | |||
case reflect.Float64, reflect.Float32: | |||
propertyValFloat := propertyValue.Float() | |||
if subDocMapping != nil { | |||
// index by explicit mapping | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) | |||
} | |||
} else if closestDocMapping.Dynamic { | |||
// automatic indexing behavior | |||
fieldMapping := newNumericFieldMappingDynamic(context.im) | |||
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) | |||
} | |||
case reflect.Bool: | |||
propertyValBool := propertyValue.Bool() | |||
if subDocMapping != nil { | |||
// index by explicit mapping | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) | |||
} | |||
} else if closestDocMapping.Dynamic { | |||
// automatic indexing behavior | |||
fieldMapping := newBooleanFieldMappingDynamic(context.im) | |||
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) | |||
} | |||
case reflect.Struct: | |||
switch property := property.(type) { | |||
case time.Time: | |||
// don't descend into the time struct | |||
if subDocMapping != nil { | |||
// index by explicit mapping | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
fieldMapping.processTime(property, pathString, path, indexes, context) | |||
} | |||
} else if closestDocMapping.Dynamic { | |||
fieldMapping := newDateTimeFieldMappingDynamic(context.im) | |||
fieldMapping.processTime(property, pathString, path, indexes, context) | |||
} | |||
default: | |||
dm.walkDocument(property, path, indexes, context) | |||
} | |||
default: | |||
dm.walkDocument(property, path, indexes, context) | |||
} | |||
} |
@@ -0,0 +1,296 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package mapping | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"time" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/document" | |||
) | |||
// control the default behavior for dynamic fields (those not explicitly mapped) | |||
var ( | |||
IndexDynamic = true | |||
StoreDynamic = true | |||
) | |||
// A FieldMapping describes how a specific item | |||
// should be put into the index. | |||
type FieldMapping struct { | |||
Name string `json:"name,omitempty"` | |||
Type string `json:"type,omitempty"` | |||
// Analyzer specifies the name of the analyzer to use for this field. If | |||
// Analyzer is empty, traverse the DocumentMapping tree toward the root and | |||
// pick the first non-empty DefaultAnalyzer found. If there is none, use | |||
// the IndexMapping.DefaultAnalyzer. | |||
Analyzer string `json:"analyzer,omitempty"` | |||
// Store indicates whether to store field values in the index. Stored | |||
// values can be retrieved from search results using SearchRequest.Fields. | |||
Store bool `json:"store,omitempty"` | |||
Index bool `json:"index,omitempty"` | |||
// IncludeTermVectors, if true, makes terms occurrences to be recorded for | |||
// this field. It includes the term position within the terms sequence and | |||
// the term offsets in the source document field. Term vectors are required | |||
// to perform phrase queries or terms highlighting in source documents. | |||
IncludeTermVectors bool `json:"include_term_vectors,omitempty"` | |||
IncludeInAll bool `json:"include_in_all,omitempty"` | |||
DateFormat string `json:"date_format,omitempty"` | |||
} | |||
// NewTextFieldMapping returns a default field mapping for text | |||
func NewTextFieldMapping() *FieldMapping { | |||
return &FieldMapping{ | |||
Type: "text", | |||
Store: true, | |||
Index: true, | |||
IncludeTermVectors: true, | |||
IncludeInAll: true, | |||
} | |||
} | |||
func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { | |||
rv := NewTextFieldMapping() | |||
rv.Store = im.StoreDynamic | |||
rv.Index = im.IndexDynamic | |||
return rv | |||
} | |||
// NewNumericFieldMapping returns a default field mapping for numbers | |||
func NewNumericFieldMapping() *FieldMapping { | |||
return &FieldMapping{ | |||
Type: "number", | |||
Store: true, | |||
Index: true, | |||
IncludeInAll: true, | |||
} | |||
} | |||
func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { | |||
rv := NewNumericFieldMapping() | |||
rv.Store = im.StoreDynamic | |||
rv.Index = im.IndexDynamic | |||
return rv | |||
} | |||
// NewDateTimeFieldMapping returns a default field mapping for dates | |||
func NewDateTimeFieldMapping() *FieldMapping { | |||
return &FieldMapping{ | |||
Type: "datetime", | |||
Store: true, | |||
Index: true, | |||
IncludeInAll: true, | |||
} | |||
} | |||
func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { | |||
rv := NewDateTimeFieldMapping() | |||
rv.Store = im.StoreDynamic | |||
rv.Index = im.IndexDynamic | |||
return rv | |||
} | |||
// NewBooleanFieldMapping returns a default field mapping for booleans | |||
func NewBooleanFieldMapping() *FieldMapping { | |||
return &FieldMapping{ | |||
Type: "boolean", | |||
Store: true, | |||
Index: true, | |||
IncludeInAll: true, | |||
} | |||
} | |||
func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { | |||
rv := NewBooleanFieldMapping() | |||
rv.Store = im.StoreDynamic | |||
rv.Index = im.IndexDynamic | |||
return rv | |||
} | |||
// Options returns the indexing options for this field. | |||
func (fm *FieldMapping) Options() document.IndexingOptions { | |||
var rv document.IndexingOptions | |||
if fm.Store { | |||
rv |= document.StoreField | |||
} | |||
if fm.Index { | |||
rv |= document.IndexField | |||
} | |||
if fm.IncludeTermVectors { | |||
rv |= document.IncludeTermVectors | |||
} | |||
return rv | |||
} | |||
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) { | |||
fieldName := getFieldName(pathString, path, fm) | |||
options := fm.Options() | |||
if fm.Type == "text" { | |||
analyzer := fm.analyzerForField(path, context) | |||
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer) | |||
context.doc.AddField(field) | |||
if !fm.IncludeInAll { | |||
context.excludedFromAll = append(context.excludedFromAll, fieldName) | |||
} | |||
} else if fm.Type == "datetime" { | |||
dateTimeFormat := context.im.DefaultDateTimeParser | |||
if fm.DateFormat != "" { | |||
dateTimeFormat = fm.DateFormat | |||
} | |||
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat) | |||
if dateTimeParser != nil { | |||
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) | |||
if err == nil { | |||
fm.processTime(parsedDateTime, pathString, path, indexes, context) | |||
} | |||
} | |||
} | |||
} | |||
func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) { | |||
fieldName := getFieldName(pathString, path, fm) | |||
if fm.Type == "number" { | |||
options := fm.Options() | |||
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options) | |||
context.doc.AddField(field) | |||
if !fm.IncludeInAll { | |||
context.excludedFromAll = append(context.excludedFromAll, fieldName) | |||
} | |||
} | |||
} | |||
func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString string, path []string, indexes []uint64, context *walkContext) { | |||
fieldName := getFieldName(pathString, path, fm) | |||
if fm.Type == "datetime" { | |||
options := fm.Options() | |||
field, err := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, propertyValueTime, options) | |||
if err == nil { | |||
context.doc.AddField(field) | |||
} else { | |||
logger.Printf("could not build date %v", err) | |||
} | |||
if !fm.IncludeInAll { | |||
context.excludedFromAll = append(context.excludedFromAll, fieldName) | |||
} | |||
} | |||
} | |||
func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string, path []string, indexes []uint64, context *walkContext) { | |||
fieldName := getFieldName(pathString, path, fm) | |||
if fm.Type == "boolean" { | |||
options := fm.Options() | |||
field := document.NewBooleanFieldWithIndexingOptions(fieldName, indexes, propertyValueBool, options) | |||
context.doc.AddField(field) | |||
if !fm.IncludeInAll { | |||
context.excludedFromAll = append(context.excludedFromAll, fieldName) | |||
} | |||
} | |||
} | |||
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { | |||
analyzerName := fm.Analyzer | |||
if analyzerName == "" { | |||
analyzerName = context.dm.defaultAnalyzerName(path) | |||
if analyzerName == "" { | |||
analyzerName = context.im.DefaultAnalyzer | |||
} | |||
} | |||
return context.im.AnalyzerNamed(analyzerName) | |||
} | |||
func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string { | |||
fieldName := pathString | |||
if fieldMapping.Name != "" { | |||
parentName := "" | |||
if len(path) > 1 { | |||
parentName = encodePath(path[:len(path)-1]) + pathSeparator | |||
} | |||
fieldName = parentName + fieldMapping.Name | |||
} | |||
return fieldName | |||
} | |||
// UnmarshalJSON offers custom unmarshaling with optional strict validation | |||
func (fm *FieldMapping) UnmarshalJSON(data []byte) error { | |||
var tmp map[string]json.RawMessage | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
var invalidKeys []string | |||
for k, v := range tmp { | |||
switch k { | |||
case "name": | |||
err := json.Unmarshal(v, &fm.Name) | |||
if err != nil { | |||
return err | |||
} | |||
case "type": | |||
err := json.Unmarshal(v, &fm.Type) | |||
if err != nil { | |||
return err | |||
} | |||
case "analyzer": | |||
err := json.Unmarshal(v, &fm.Analyzer) | |||
if err != nil { | |||
return err | |||
} | |||
case "store": | |||
err := json.Unmarshal(v, &fm.Store) | |||
if err != nil { | |||
return err | |||
} | |||
case "index": | |||
err := json.Unmarshal(v, &fm.Index) | |||
if err != nil { | |||
return err | |||
} | |||
case "include_term_vectors": | |||
err := json.Unmarshal(v, &fm.IncludeTermVectors) | |||
if err != nil { | |||
return err | |||
} | |||
case "include_in_all": | |||
err := json.Unmarshal(v, &fm.IncludeInAll) | |||
if err != nil { | |||
return err | |||
} | |||
case "date_format": | |||
err := json.Unmarshal(v, &fm.DateFormat) | |||
if err != nil { | |||
return err | |||
} | |||
default: | |||
invalidKeys = append(invalidKeys, k) | |||
} | |||
} | |||
if MappingJSONStrict && len(invalidKeys) > 0 { | |||
return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys) | |||
} | |||
return nil | |||
} |
@@ -0,0 +1,430 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package mapping | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/analyzer/standard" | |||
"github.com/blevesearch/bleve/analysis/datetime/optional" | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
var MappingJSONStrict = false | |||
const defaultTypeField = "_type" | |||
const defaultType = "_default" | |||
const defaultField = "_all" | |||
const defaultAnalyzer = standard.Name | |||
const defaultDateTimeParser = optional.Name | |||
// An IndexMappingImpl controls how objects are placed | |||
// into an index. | |||
// First the type of the object is determined. | |||
// Once the type is know, the appropriate | |||
// DocumentMapping is selected by the type. | |||
// If no mapping was determined for that type, | |||
// a DefaultMapping will be used. | |||
type IndexMappingImpl struct { | |||
TypeMapping map[string]*DocumentMapping `json:"types,omitempty"` | |||
DefaultMapping *DocumentMapping `json:"default_mapping"` | |||
TypeField string `json:"type_field"` | |||
DefaultType string `json:"default_type"` | |||
DefaultAnalyzer string `json:"default_analyzer"` | |||
DefaultDateTimeParser string `json:"default_datetime_parser"` | |||
DefaultField string `json:"default_field"` | |||
StoreDynamic bool `json:"store_dynamic"` | |||
IndexDynamic bool `json:"index_dynamic"` | |||
CustomAnalysis *customAnalysis `json:"analysis,omitempty"` | |||
cache *registry.Cache | |||
} | |||
// AddCustomCharFilter defines a custom char filter for use in this mapping | |||
func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error { | |||
_, err := im.cache.DefineCharFilter(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
im.CustomAnalysis.CharFilters[name] = config | |||
return nil | |||
} | |||
// AddCustomTokenizer defines a custom tokenizer for use in this mapping | |||
func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error { | |||
_, err := im.cache.DefineTokenizer(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
im.CustomAnalysis.Tokenizers[name] = config | |||
return nil | |||
} | |||
// AddCustomTokenMap defines a custom token map for use in this mapping | |||
func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error { | |||
_, err := im.cache.DefineTokenMap(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
im.CustomAnalysis.TokenMaps[name] = config | |||
return nil | |||
} | |||
// AddCustomTokenFilter defines a custom token filter for use in this mapping | |||
func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error { | |||
_, err := im.cache.DefineTokenFilter(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
im.CustomAnalysis.TokenFilters[name] = config | |||
return nil | |||
} | |||
// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The | |||
// config map must have a "type" string entry to resolve the analyzer | |||
// constructor. The constructor is invoked with the remaining entries and | |||
// returned analyzer is registered in the IndexMapping. | |||
// | |||
// bleve comes with predefined analyzers, like | |||
// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are | |||
// available only if their package is imported by client code. To achieve this, | |||
// use their metadata to fill configuration entries: | |||
// | |||
// import ( | |||
// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" | |||
// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter" | |||
// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" | |||
// "github.com/blevesearch/bleve/analysis/tokenizers/unicode" | |||
// ) | |||
// | |||
// m := bleve.NewIndexMapping() | |||
// err := m.AddCustomAnalyzer("html", map[string]interface{}{ | |||
// "type": custom_analyzer.Name, | |||
// "char_filters": []string{ | |||
// html_char_filter.Name, | |||
// }, | |||
// "tokenizer": unicode.Name, | |||
// "token_filters": []string{ | |||
// lower_case_filter.Name, | |||
// ... | |||
// }, | |||
// }) | |||
func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error { | |||
_, err := im.cache.DefineAnalyzer(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
im.CustomAnalysis.Analyzers[name] = config | |||
return nil | |||
} | |||
// AddCustomDateTimeParser defines a custom date time parser for use in this mapping | |||
func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error { | |||
_, err := im.cache.DefineDateTimeParser(name, config) | |||
if err != nil { | |||
return err | |||
} | |||
im.CustomAnalysis.DateTimeParsers[name] = config | |||
return nil | |||
} | |||
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules | |||
func NewIndexMapping() *IndexMappingImpl { | |||
return &IndexMappingImpl{ | |||
TypeMapping: make(map[string]*DocumentMapping), | |||
DefaultMapping: NewDocumentMapping(), | |||
TypeField: defaultTypeField, | |||
DefaultType: defaultType, | |||
DefaultAnalyzer: defaultAnalyzer, | |||
DefaultDateTimeParser: defaultDateTimeParser, | |||
DefaultField: defaultField, | |||
IndexDynamic: IndexDynamic, | |||
StoreDynamic: StoreDynamic, | |||
CustomAnalysis: newCustomAnalysis(), | |||
cache: registry.NewCache(), | |||
} | |||
} | |||
// Validate will walk the entire structure ensuring the following | |||
// explicitly named and default analyzers can be built | |||
func (im *IndexMappingImpl) Validate() error { | |||
_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer) | |||
if err != nil { | |||
return err | |||
} | |||
_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser) | |||
if err != nil { | |||
return err | |||
} | |||
err = im.DefaultMapping.Validate(im.cache) | |||
if err != nil { | |||
return err | |||
} | |||
for _, docMapping := range im.TypeMapping { | |||
err = docMapping.Validate(im.cache) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
return nil | |||
} | |||
// AddDocumentMapping sets a custom document mapping for the specified type | |||
func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) { | |||
im.TypeMapping[doctype] = dm | |||
} | |||
func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping { | |||
docMapping := im.TypeMapping[docType] | |||
if docMapping == nil { | |||
docMapping = im.DefaultMapping | |||
} | |||
return docMapping | |||
} | |||
// UnmarshalJSON offers custom unmarshaling with optional strict validation | |||
func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { | |||
var tmp map[string]json.RawMessage | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
// set defaults for fields which might have been omitted | |||
im.cache = registry.NewCache() | |||
im.CustomAnalysis = newCustomAnalysis() | |||
im.TypeField = defaultTypeField | |||
im.DefaultType = defaultType | |||
im.DefaultAnalyzer = defaultAnalyzer | |||
im.DefaultDateTimeParser = defaultDateTimeParser | |||
im.DefaultField = defaultField | |||
im.DefaultMapping = NewDocumentMapping() | |||
im.TypeMapping = make(map[string]*DocumentMapping) | |||
im.StoreDynamic = StoreDynamic | |||
im.IndexDynamic = IndexDynamic | |||
var invalidKeys []string | |||
for k, v := range tmp { | |||
switch k { | |||
case "analysis": | |||
err := json.Unmarshal(v, &im.CustomAnalysis) | |||
if err != nil { | |||
return err | |||
} | |||
case "type_field": | |||
err := json.Unmarshal(v, &im.TypeField) | |||
if err != nil { | |||
return err | |||
} | |||
case "default_type": | |||
err := json.Unmarshal(v, &im.DefaultType) | |||
if err != nil { | |||
return err | |||
} | |||
case "default_analyzer": | |||
err := json.Unmarshal(v, &im.DefaultAnalyzer) | |||
if err != nil { | |||
return err | |||
} | |||
case "default_datetime_parser": | |||
err := json.Unmarshal(v, &im.DefaultDateTimeParser) | |||
if err != nil { | |||
return err | |||
} | |||
case "default_field": | |||
err := json.Unmarshal(v, &im.DefaultField) | |||
if err != nil { | |||
return err | |||
} | |||
case "default_mapping": | |||
err := json.Unmarshal(v, &im.DefaultMapping) | |||
if err != nil { | |||
return err | |||
} | |||
case "types": | |||
err := json.Unmarshal(v, &im.TypeMapping) | |||
if err != nil { | |||
return err | |||
} | |||
case "store_dynamic": | |||
err := json.Unmarshal(v, &im.StoreDynamic) | |||
if err != nil { | |||
return err | |||
} | |||
case "index_dynamic": | |||
err := json.Unmarshal(v, &im.IndexDynamic) | |||
if err != nil { | |||
return err | |||
} | |||
default: | |||
invalidKeys = append(invalidKeys, k) | |||
} | |||
} | |||
if MappingJSONStrict && len(invalidKeys) > 0 { | |||
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys) | |||
} | |||
err = im.CustomAnalysis.registerAll(im) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (im *IndexMappingImpl) determineType(data interface{}) string { | |||
// first see if the object implements Classifier | |||
classifier, ok := data.(Classifier) | |||
if ok { | |||
return classifier.Type() | |||
} | |||
// now see if we can find a type using the mapping | |||
typ, ok := mustString(lookupPropertyPath(data, im.TypeField)) | |||
if ok { | |||
return typ | |||
} | |||
return im.DefaultType | |||
} | |||
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { | |||
docType := im.determineType(data) | |||
docMapping := im.mappingForType(docType) | |||
walkContext := im.newWalkContext(doc, docMapping) | |||
if docMapping.Enabled { | |||
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) | |||
// see if the _all field was disabled | |||
allMapping := docMapping.documentMappingForPath("_all") | |||
if allMapping == nil || (allMapping.Enabled != false) { | |||
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors) | |||
doc.AddField(field) | |||
} | |||
} | |||
return nil | |||
} | |||
type walkContext struct { | |||
doc *document.Document | |||
im *IndexMappingImpl | |||
dm *DocumentMapping | |||
excludedFromAll []string | |||
} | |||
func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext { | |||
return &walkContext{ | |||
doc: doc, | |||
im: im, | |||
dm: dm, | |||
excludedFromAll: []string{}, | |||
} | |||
} | |||
// AnalyzerNameForPath attempts to find the best analyzer to use with only a | |||
// field name will walk all the document types, look for field mappings at the | |||
// provided path, if one exists and it has an explicit analyzer that is | |||
// returned. | |||
func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string { | |||
// first we look for explicit mapping on the field | |||
for _, docMapping := range im.TypeMapping { | |||
analyzerName := docMapping.analyzerNameForPath(path) | |||
if analyzerName != "" { | |||
return analyzerName | |||
} | |||
} | |||
// now try the default mapping | |||
pathMapping := im.DefaultMapping.documentMappingForPath(path) | |||
if pathMapping != nil { | |||
if len(pathMapping.Fields) > 0 { | |||
if pathMapping.Fields[0].Analyzer != "" { | |||
return pathMapping.Fields[0].Analyzer | |||
} | |||
} | |||
} | |||
// next we will try default analyzers for the path | |||
pathDecoded := decodePath(path) | |||
for _, docMapping := range im.TypeMapping { | |||
rv := docMapping.defaultAnalyzerName(pathDecoded) | |||
if rv != "" { | |||
return rv | |||
} | |||
} | |||
return im.DefaultAnalyzer | |||
} | |||
func (im *IndexMappingImpl) AnalyzerNamed(name string) *analysis.Analyzer { | |||
analyzer, err := im.cache.AnalyzerNamed(name) | |||
if err != nil { | |||
logger.Printf("error using analyzer named: %s", name) | |||
return nil | |||
} | |||
return analyzer | |||
} | |||
func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser { | |||
if name == "" { | |||
name = im.DefaultDateTimeParser | |||
} | |||
dateTimeParser, err := im.cache.DateTimeParserNamed(name) | |||
if err != nil { | |||
logger.Printf("error using datetime parser named: %s", name) | |||
return nil | |||
} | |||
return dateTimeParser | |||
} | |||
func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string { | |||
// first we look for explicit mapping on the field | |||
for _, docMapping := range im.TypeMapping { | |||
pathMapping := docMapping.documentMappingForPath(path) | |||
if pathMapping != nil { | |||
if len(pathMapping.Fields) > 0 { | |||
if pathMapping.Fields[0].Analyzer != "" { | |||
return pathMapping.Fields[0].Analyzer | |||
} | |||
} | |||
} | |||
} | |||
return im.DefaultDateTimeParser | |||
} | |||
func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) { | |||
analyzer, err := im.cache.AnalyzerNamed(analyzerName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return analyzer.Analyze(text), nil | |||
} | |||
// FieldAnalyzer returns the name of the analyzer used on a field. | |||
func (im *IndexMappingImpl) FieldAnalyzer(field string) string { | |||
return im.AnalyzerNameForPath(field) | |||
} | |||
// wrapper to satisfy new interface | |||
func (im *IndexMappingImpl) DefaultSearchField() string { | |||
return im.DefaultField | |||
} |
@@ -0,0 +1,49 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package mapping | |||
import ( | |||
"io/ioutil" | |||
"log" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/document" | |||
) | |||
// A Classifier is an interface describing any object | |||
// which knows how to identify its own type. | |||
type Classifier interface { | |||
Type() string | |||
} | |||
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags) | |||
// SetLog sets the logger used for logging | |||
// by default log messages are sent to ioutil.Discard | |||
func SetLog(l *log.Logger) { | |||
logger = l | |||
} | |||
type IndexMapping interface { | |||
MapDocument(doc *document.Document, data interface{}) error | |||
Validate() error | |||
DateTimeParserNamed(name string) analysis.DateTimeParser | |||
DefaultSearchField() string | |||
AnalyzerNameForPath(path string) string | |||
AnalyzerNamed(name string) *analysis.Analyzer | |||
} |
@@ -0,0 +1,89 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package mapping | |||
import ( | |||
"reflect" | |||
"strings" | |||
) | |||
func lookupPropertyPath(data interface{}, path string) interface{} { | |||
pathParts := decodePath(path) | |||
current := data | |||
for _, part := range pathParts { | |||
current = lookupPropertyPathPart(current, part) | |||
if current == nil { | |||
break | |||
} | |||
} | |||
return current | |||
} | |||
func lookupPropertyPathPart(data interface{}, part string) interface{} { | |||
val := reflect.ValueOf(data) | |||
typ := val.Type() | |||
switch typ.Kind() { | |||
case reflect.Map: | |||
// FIXME can add support for other map keys in the future | |||
if typ.Key().Kind() == reflect.String { | |||
key := reflect.ValueOf(part) | |||
entry := val.MapIndex(key) | |||
if entry.IsValid() { | |||
return entry.Interface() | |||
} | |||
} | |||
case reflect.Struct: | |||
field := val.FieldByName(part) | |||
if field.IsValid() && field.CanInterface() { | |||
return field.Interface() | |||
} | |||
case reflect.Ptr: | |||
ptrElem := val.Elem() | |||
if ptrElem.IsValid() && ptrElem.CanInterface() { | |||
return lookupPropertyPathPart(ptrElem.Interface(), part) | |||
} | |||
} | |||
return nil | |||
} | |||
const pathSeparator = "." | |||
func decodePath(path string) []string { | |||
return strings.Split(path, pathSeparator) | |||
} | |||
func encodePath(pathElements []string) string { | |||
return strings.Join(pathElements, pathSeparator) | |||
} | |||
func mustString(data interface{}) (string, bool) { | |||
if data != nil { | |||
str, ok := data.(string) | |||
if ok { | |||
return str, true | |||
} | |||
} | |||
return "", false | |||
} | |||
// parseTagName extracts the field name from a struct tag | |||
func parseTagName(tag string) string { | |||
if idx := strings.Index(tag, ","); idx != -1 { | |||
return tag[:idx] | |||
} | |||
return tag | |||
} |
@@ -0,0 +1,34 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package numeric | |||
import ( | |||
"math" | |||
) | |||
func Float64ToInt64(f float64) int64 { | |||
fasint := int64(math.Float64bits(f)) | |||
if fasint < 0 { | |||
fasint = fasint ^ 0x7fffffffffffffff | |||
} | |||
return fasint | |||
} | |||
func Int64ToFloat64(i int64) float64 { | |||
if i < 0 { | |||
i ^= 0x7fffffffffffffff | |||
} | |||
return math.Float64frombits(uint64(i)) | |||
} |
@@ -0,0 +1,92 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package numeric | |||
import "fmt" | |||
const ShiftStartInt64 byte = 0x20 | |||
// PrefixCoded is a byte array encoding of | |||
// 64-bit numeric values shifted by 0-63 bits | |||
type PrefixCoded []byte | |||
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { | |||
if shift > 63 { | |||
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) | |||
} | |||
nChars := ((63 - shift) / 7) + 1 | |||
rv := make(PrefixCoded, nChars+1) | |||
rv[0] = ShiftStartInt64 + byte(shift) | |||
sortableBits := int64(uint64(in) ^ 0x8000000000000000) | |||
sortableBits = int64(uint64(sortableBits) >> shift) | |||
for nChars > 0 { | |||
// Store 7 bits per byte for compatibility | |||
// with UTF-8 encoding of terms | |||
rv[nChars] = byte(sortableBits & 0x7f) | |||
nChars-- | |||
sortableBits = int64(uint64(sortableBits) >> 7) | |||
} | |||
return rv, nil | |||
} | |||
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { | |||
rv, err := NewPrefixCodedInt64(in, shift) | |||
if err != nil { | |||
panic(err) | |||
} | |||
return rv | |||
} | |||
// Shift returns the number of bits shifted | |||
// returns 0 if in uninitialized state | |||
func (p PrefixCoded) Shift() (uint, error) { | |||
if len(p) > 0 { | |||
shift := p[0] - ShiftStartInt64 | |||
if shift < 0 || shift < 63 { | |||
return uint(shift), nil | |||
} | |||
} | |||
return 0, fmt.Errorf("invalid prefix coded value") | |||
} | |||
func (p PrefixCoded) Int64() (int64, error) { | |||
shift, err := p.Shift() | |||
if err != nil { | |||
return 0, err | |||
} | |||
var sortableBits int64 | |||
for _, inbyte := range p[1:] { | |||
sortableBits <<= 7 | |||
sortableBits |= int64(inbyte) | |||
} | |||
return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil | |||
} | |||
func ValidPrefixCodedTerm(p string) (bool, int) { | |||
if len(p) > 0 { | |||
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 { | |||
return false, 0 | |||
} | |||
shift := p[0] - ShiftStartInt64 | |||
nChars := ((63 - int(shift)) / 7) + 1 | |||
if len(p) != nChars+1 { | |||
return false, 0 | |||
} | |||
return true, int(shift) | |||
} | |||
return false, 0 | |||
} |
@@ -0,0 +1,186 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package bleve | |||
import ( | |||
"time" | |||
"github.com/blevesearch/bleve/search/query" | |||
) | |||
// NewBoolFieldQuery creates a new Query for boolean fields | |||
func NewBoolFieldQuery(val bool) *query.BoolFieldQuery { | |||
return query.NewBoolFieldQuery(val) | |||
} | |||
// NewBooleanQuery creates a compound Query composed | |||
// of several other Query objects. | |||
// These other query objects are added using the | |||
// AddMust() AddShould() and AddMustNot() methods. | |||
// Result documents must satisfy ALL of the | |||
// must Queries. | |||
// Result documents must satisfy NONE of the must not | |||
// Queries. | |||
// Result documents that ALSO satisfy any of the should | |||
// Queries will score higher. | |||
func NewBooleanQuery() *query.BooleanQuery { | |||
return query.NewBooleanQuery(nil, nil, nil) | |||
} | |||
// NewConjunctionQuery creates a new compound Query. | |||
// Result documents must satisfy all of the queries. | |||
func NewConjunctionQuery(conjuncts ...query.Query) *query.ConjunctionQuery { | |||
return query.NewConjunctionQuery(conjuncts) | |||
} | |||
// NewDateRangeQuery creates a new Query for ranges | |||
// of date values. | |||
// Date strings are parsed using the DateTimeParser configured in the | |||
// top-level config.QueryDateTimeParser | |||
// Either, but not both endpoints can be nil. | |||
func NewDateRangeQuery(start, end time.Time) *query.DateRangeQuery { | |||
return query.NewDateRangeQuery(start, end) | |||
} | |||
// NewDateRangeInclusiveQuery creates a new Query for ranges | |||
// of date values. | |||
// Date strings are parsed using the DateTimeParser configured in the | |||
// top-level config.QueryDateTimeParser | |||
// Either, but not both endpoints can be nil. | |||
// startInclusive and endInclusive control inclusion of the endpoints. | |||
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *query.DateRangeQuery { | |||
return query.NewDateRangeInclusiveQuery(start, end, startInclusive, endInclusive) | |||
} | |||
// NewDisjunctionQuery creates a new compound Query. | |||
// Result documents satisfy at least one Query. | |||
func NewDisjunctionQuery(disjuncts ...query.Query) *query.DisjunctionQuery { | |||
return query.NewDisjunctionQuery(disjuncts) | |||
} | |||
// NewDocIDQuery creates a new Query object returning indexed documents among | |||
// the specified set. Combine it with ConjunctionQuery to restrict the scope of | |||
// other queries output. | |||
func NewDocIDQuery(ids []string) *query.DocIDQuery { | |||
return query.NewDocIDQuery(ids) | |||
} | |||
// NewFuzzyQuery creates a new Query which finds | |||
// documents containing terms within a specific | |||
// fuzziness of the specified term. | |||
// The default fuzziness is 1. | |||
// | |||
// The current implementation uses Levenshtein edit | |||
// distance as the fuzziness metric. | |||
func NewFuzzyQuery(term string) *query.FuzzyQuery { | |||
return query.NewFuzzyQuery(term) | |||
} | |||
// NewMatchAllQuery creates a Query which will | |||
// match all documents in the index. | |||
func NewMatchAllQuery() *query.MatchAllQuery { | |||
return query.NewMatchAllQuery() | |||
} | |||
// NewMatchNoneQuery creates a Query which will not | |||
// match any documents in the index. | |||
func NewMatchNoneQuery() *query.MatchNoneQuery { | |||
return query.NewMatchNoneQuery() | |||
} | |||
// NewMatchPhraseQuery creates a new Query object | |||
// for matching phrases in the index. | |||
// An Analyzer is chosen based on the field. | |||
// Input text is analyzed using this analyzer. | |||
// Token terms resulting from this analysis are | |||
// used to build a search phrase. Result documents | |||
// must match this phrase. Queried field must have been indexed with | |||
// IncludeTermVectors set to true. | |||
func NewMatchPhraseQuery(matchPhrase string) *query.MatchPhraseQuery { | |||
return query.NewMatchPhraseQuery(matchPhrase) | |||
} | |||
// NewMatchQuery creates a Query for matching text. | |||
// An Analyzer is chosen based on the field. | |||
// Input text is analyzed using this analyzer. | |||
// Token terms resulting from this analysis are | |||
// used to perform term searches. Result documents | |||
// must satisfy at least one of these term searches. | |||
func NewMatchQuery(match string) *query.MatchQuery { | |||
return query.NewMatchQuery(match) | |||
} | |||
// NewNumericRangeQuery creates a new Query for ranges | |||
// of numeric values. | |||
// Either, but not both endpoints can be nil. | |||
// The minimum value is inclusive. | |||
// The maximum value is exclusive. | |||
func NewNumericRangeQuery(min, max *float64) *query.NumericRangeQuery { | |||
return query.NewNumericRangeQuery(min, max) | |||
} | |||
// NewNumericRangeInclusiveQuery creates a new Query for ranges | |||
// of numeric values. | |||
// Either, but not both endpoints can be nil. | |||
// Control endpoint inclusion with inclusiveMin, inclusiveMax. | |||
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *query.NumericRangeQuery { | |||
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive) | |||
} | |||
// NewPhraseQuery creates a new Query for finding | |||
// exact term phrases in the index. | |||
// The provided terms must exist in the correct | |||
// order, at the correct index offsets, in the | |||
// specified field. Queried field must have been indexed with | |||
// IncludeTermVectors set to true. | |||
func NewPhraseQuery(terms []string, field string) *query.PhraseQuery { | |||
return query.NewPhraseQuery(terms, field) | |||
} | |||
// NewPrefixQuery creates a new Query which finds | |||
// documents containing terms that start with the | |||
// specified prefix. | |||
func NewPrefixQuery(prefix string) *query.PrefixQuery { | |||
return query.NewPrefixQuery(prefix) | |||
} | |||
// NewRegexpQuery creates a new Query which finds | |||
// documents containing terms that match the | |||
// specified regular expression. | |||
func NewRegexpQuery(regexp string) *query.RegexpQuery { | |||
return query.NewRegexpQuery(regexp) | |||
} | |||
// NewQueryStringQuery creates a new Query used for | |||
// finding documents that satisfy a query string. The | |||
// query string is a small query language for humans. | |||
func NewQueryStringQuery(q string) *query.QueryStringQuery { | |||
return query.NewQueryStringQuery(q) | |||
} | |||
// NewTermQuery creates a new Query for finding an | |||
// exact term match in the index. | |||
func NewTermQuery(term string) *query.TermQuery { | |||
return query.NewTermQuery(term) | |||
} | |||
// NewWildcardQuery creates a new Query which finds | |||
// documents containing terms that match the | |||
// specified wildcard. In the wildcard pattern '*' | |||
// will match any sequence of 0 or more characters, | |||
// and '?' will match any single character. | |||
func NewWildcardQuery(wildcard string) *query.WildcardQuery { | |||
return query.NewWildcardQuery(wildcard) | |||
} |
@@ -0,0 +1,89 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package registry | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
func RegisterAnalyzer(name string, constructor AnalyzerConstructor) { | |||
_, exists := analyzers[name] | |||
if exists { | |||
panic(fmt.Errorf("attempted to register duplicate analyzer named '%s'", name)) | |||
} | |||
analyzers[name] = constructor | |||
} | |||
type AnalyzerConstructor func(config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) | |||
type AnalyzerRegistry map[string]AnalyzerConstructor | |||
type AnalyzerCache struct { | |||
*ConcurrentCache | |||
} | |||
func NewAnalyzerCache() *AnalyzerCache { | |||
return &AnalyzerCache{ | |||
NewConcurrentCache(), | |||
} | |||
} | |||
func AnalyzerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { | |||
cons, registered := analyzers[name] | |||
if !registered { | |||
return nil, fmt.Errorf("no analyzer with name or type '%s' registered", name) | |||
} | |||
analyzer, err := cons(config, cache) | |||
if err != nil { | |||
return nil, fmt.Errorf("error building analyzer: %v", err) | |||
} | |||
return analyzer, nil | |||
} | |||
func (c *AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) { | |||
item, err := c.ItemNamed(name, cache, AnalyzerBuild) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return item.(*analysis.Analyzer), nil | |||
} | |||
func (c *AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) { | |||
item, err := c.DefineItem(name, typ, config, cache, AnalyzerBuild) | |||
if err != nil { | |||
if err == ErrAlreadyDefined { | |||
return nil, fmt.Errorf("analyzer named '%s' already defined", name) | |||
} | |||
return nil, err | |||
} | |||
return item.(*analysis.Analyzer), nil | |||
} | |||
func AnalyzerTypesAndInstances() ([]string, []string) { | |||
emptyConfig := map[string]interface{}{} | |||
emptyCache := NewCache() | |||
var types []string | |||
var instances []string | |||
for name, cons := range analyzers { | |||
_, err := cons(emptyConfig, emptyCache) | |||
if err == nil { | |||
instances = append(instances, name) | |||
} else { | |||
types = append(types, name) | |||
} | |||
} | |||
return types, instances | |||
} |
@@ -0,0 +1,87 @@ | |||
// Copyright (c) 2016 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package registry | |||
import ( | |||
"fmt" | |||
"sync" | |||
) | |||
var ErrAlreadyDefined = fmt.Errorf("item already defined") | |||
type CacheBuild func(name string, config map[string]interface{}, cache *Cache) (interface{}, error) | |||
type ConcurrentCache struct { | |||
mutex sync.RWMutex | |||
data map[string]interface{} | |||
} | |||
func NewConcurrentCache() *ConcurrentCache { | |||
return &ConcurrentCache{ | |||
data: make(map[string]interface{}), | |||
} | |||
} | |||
func (c *ConcurrentCache) ItemNamed(name string, cache *Cache, build CacheBuild) (interface{}, error) { | |||
c.mutex.RLock() | |||
item, cached := c.data[name] | |||
if cached { | |||
c.mutex.RUnlock() | |||
return item, nil | |||
} | |||
// give up read lock | |||
c.mutex.RUnlock() | |||
// try to build it | |||
newItem, err := build(name, nil, cache) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// acquire write lock | |||
c.mutex.Lock() | |||
defer c.mutex.Unlock() | |||
// check again because it could have been created while trading locks | |||
item, cached = c.data[name] | |||
if cached { | |||
return item, nil | |||
} | |||
c.data[name] = newItem | |||
return newItem, nil | |||
} | |||
func (c *ConcurrentCache) DefineItem(name string, typ string, config map[string]interface{}, cache *Cache, build CacheBuild) (interface{}, error) { | |||
c.mutex.RLock() | |||
_, cached := c.data[name] | |||
if cached { | |||
c.mutex.RUnlock() | |||
return nil, ErrAlreadyDefined | |||
} | |||
// give up read lock so others lookups can proceed | |||
c.mutex.RUnlock() | |||
// really not there, try to build it | |||
newItem, err := build(typ, config, cache) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// now we've built it, acquire lock | |||
c.mutex.Lock() | |||
defer c.mutex.Unlock() | |||
// check again because it could have been created while trading locks | |||
_, cached = c.data[name] | |||
if cached { | |||
return nil, ErrAlreadyDefined | |||
} | |||
c.data[name] = newItem | |||
return newItem, nil | |||
} |