diff options
Diffstat (limited to 'vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go')
-rw-r--r-- | vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go | 121 |
1 files changed, 78 insertions, 43 deletions
diff --git a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go index 763c80d675..a8ef538eed 100644 --- a/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go +++ b/vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go @@ -45,7 +45,7 @@ const RowBufferSize = 4 * 1024 var VersionKey = []byte{'v'} -const Version uint8 = 5 +const Version uint8 = 7 var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version) @@ -499,44 +499,65 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) { addRows = make([]UpsideDownCouchRow, 0, len(rows)) + + if backIndexRow == nil { + addRows = addRows[0:len(rows)] + for i, row := range rows { + addRows[i] = row + } + return addRows, nil, nil + } + updateRows = make([]UpsideDownCouchRow, 0, len(rows)) deleteRows = make([]UpsideDownCouchRow, 0, len(rows)) - existingTermKeys := make(map[string]bool) - for _, key := range backIndexRow.AllTermKeys() { - existingTermKeys[string(key)] = true + var existingTermKeys map[string]struct{} + backIndexTermKeys := backIndexRow.AllTermKeys() + if len(backIndexTermKeys) > 0 { + existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys)) + for _, key := range backIndexTermKeys { + existingTermKeys[string(key)] = struct{}{} + } } - existingStoredKeys := make(map[string]bool) - for _, key := range backIndexRow.AllStoredKeys() { - existingStoredKeys[string(key)] = true + var existingStoredKeys map[string]struct{} + backIndexStoredKeys := backIndexRow.AllStoredKeys() + if len(backIndexStoredKeys) > 0 { + existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys)) + for _, key := range backIndexStoredKeys { + existingStoredKeys[string(key)] = struct{}{} + } } keyBuf := GetRowBuffer() for _, row := range rows { switch row := row.(type) { case *TermFrequencyRow: - if row.KeySize() > len(keyBuf) { - keyBuf = make([]byte, row.KeySize()) - } - keySize, _ := row.KeyTo(keyBuf) - if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { - updateRows = append(updateRows, row) - delete(existingTermKeys, string(keyBuf[:keySize])) - } else { - addRows = append(addRows, row) + if existingTermKeys != nil { + if row.KeySize() > len(keyBuf) { + keyBuf = make([]byte, row.KeySize()) + } + keySize, _ := row.KeyTo(keyBuf) + if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { + updateRows = append(updateRows, row) + delete(existingTermKeys, string(keyBuf[:keySize])) + continue + } } + addRows = append(addRows, row) case *StoredRow: - if row.KeySize() > len(keyBuf) { - keyBuf = make([]byte, row.KeySize()) - } - keySize, _ := row.KeyTo(keyBuf) - if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { - updateRows = append(updateRows, row) - delete(existingStoredKeys, string(keyBuf[:keySize])) - } else { - addRows = append(addRows, row) + if existingStoredKeys != nil { + if row.KeySize() > len(keyBuf) { + keyBuf = make([]byte, row.KeySize()) + } + keySize, _ := row.KeyTo(keyBuf) + if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { + updateRows = append(updateRows, row) + delete(existingStoredKeys, string(keyBuf[:keySize])) + continue + } } + addRows = append(addRows, row) default: updateRows = append(updateRows, row) } @@ -583,33 +604,41 @@ func encodeFieldType(f document.Field) byte { fieldType = 'd' case *document.BooleanField: fieldType = 'b' + case *document.GeoPointField: + fieldType = 'g' case *document.CompositeField: fieldType = 'c' } return fieldType } -func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { +func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) { fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) + termFreqRows := make([]TermFrequencyRow, len(tokenFreqs)) + termFreqRowsUsed := 0 + + terms := make([]string, 0, len(tokenFreqs)) for k, tf := range tokenFreqs { - var termFreqRow *TermFrequencyRow + termFreqRow := &termFreqRows[termFreqRowsUsed] + termFreqRowsUsed++ + + InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID, + uint64(frequencyFromTokenFreq(tf)), fieldNorm) + if includeTermVectors { - var tv []*TermVector - tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) - termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv) - } else { - termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm) + termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) } // record the back index entry - backIndexTermEntry := BackIndexTermEntry{Term: proto.String(k), Field: proto.Uint32(uint32(fieldIndex))} - backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry) + terms = append(terms, k) rows = append(rows, termFreqRow) } + backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms} + backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry) - return rows, backIndexTermEntries + return rows, backIndexTermsEntries } func (udc *UpsideDownCouch) Delete(id string) (err error) { @@ -682,9 +711,11 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow { idBytes := []byte(id) - for _, backIndexEntry := range backIndexRow.termEntries { - tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0) - deleteRows = append(deleteRows, tfr) + for _, backIndexEntry := range backIndexRow.termsEntries { + for i := range backIndexEntry.Terms { + tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0) + deleteRows = append(deleteRows, tfr) + } } for _, se := range backIndexRow.storedEntries { sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil) @@ -706,6 +737,8 @@ func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document return document.NewDateTimeFieldFromBytes(name, pos, value) case 'b': return document.NewBooleanFieldFromBytes(name, pos, value) + case 'g': + return document.NewGeoPointFieldFromBytes(name, pos, value) } return nil } @@ -715,6 +748,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int { } func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) { + a := make([]TermVector, len(tf.Locations)) rv := make([]*TermVector, len(tf.Locations)) for i, l := range tf.Locations { @@ -727,14 +761,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. rows = append(rows, newFieldRow) } } - tv := TermVector{ + a[i] = TermVector{ field: fieldIndex, arrayPositions: l.ArrayPositions, pos: uint64(l.Position), start: uint64(l.Start), end: uint64(l.End), } - rv[i] = &tv + rv[i] = &a[i] } return rv, rows @@ -745,18 +779,19 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) [] return nil } + a := make([]index.TermFieldVector, len(in)) rv := make([]*index.TermFieldVector, len(in)) for i, tv := range in { fieldName := udc.fieldCache.FieldIndexed(tv.field) - tfv := index.TermFieldVector{ + a[i] = index.TermFieldVector{ Field: fieldName, ArrayPositions: tv.arrayPositions, Pos: tv.pos, Start: tv.start, End: tv.end, } - rv[i] = &tfv + rv[i] = &a[i] } return rv } @@ -1008,7 +1043,7 @@ func init() { func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) { // use a temporary row structure to build key - tempRow := &BackIndexRow{ + tempRow := BackIndexRow{ doc: docID, } |