diff options
160 files changed, 37644 insertions, 66 deletions
diff --git a/Gopkg.lock b/Gopkg.lock index 65cdf7efa3..ba21eca95c 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -18,6 +18,14 @@ revision = "d5a42771e7e851e8a89c5c6ffa0f5b075342f9df" [[projects]] + digest = "1:5d72bbcc9c8667b11c3dc3cbe681c5a6f71e5096744c0bf7726ab5c6425d5dc4" + name = "github.com/BurntSushi/toml" + packages = ["."] + pruneopts = "NUT" + revision = "3012a1dbe2e4bd1391d42b32f0577cb7bbc7f005" + version = "v0.3.1" + +[[projects]] digest = "1:3fcef06a1a6561955c94af6c7757a6fa37605eb653f0d06ab960e5bb80092195" name = "github.com/PuerkitoBio/goquery" packages = ["."] @@ -187,6 +195,28 @@ [[projects]] branch = "master" + digest = "1:eb205556fe75307c6d2b58d4159e7c2da23e2666481d352c66d4055bebf45a3c" + name = "github.com/couchbase/gomemcached" + packages = [ + ".", + "client", + ] + pruneopts = "NUT" + revision = "5125a94a666c83cb9b7a60907833cd320b84c20f" + +[[projects]] + branch = "master" + digest = "1:ea03e12e246f7708a7b7ab3ad04e96d21ce73f48bb56258bc2bffeed474212e6" + name = "github.com/couchbase/goutils" + packages = [ + "logging", + "scramsha", + ] + pruneopts = "NUT" + revision = "e865a1461c8ac0032bd37e2d4dab3289faea3873" + +[[projects]] + branch = "master" digest = "1:82e1ad11d777f7bff9a1fc678a8a534a318f85e5026a8a4d6f4a94a6b0678bb6" name = "github.com/couchbase/vellum" packages = [ @@ -198,6 +228,14 @@ revision = "eb6ae3743b3f300f2136f83ca78c08cc071edbd4" [[projects]] + branch = "master" + digest = "1:df592f4b82b993fcac270862376c34210776b8b0334a0f59f4d9d80467713ffa" + name = "github.com/couchbaselabs/go-couchbase" + packages = ["."] + pruneopts = "NUT" + revision = "d904413d884d1fb849e2ad8834619f661761ef57" + +[[projects]] digest = "1:a2c1d0e43bd3baaa071d1b9ed72c27d78169b2b269f71c105ac4ba34b1be4a39" name = "github.com/davecgh/go-spew" packages = ["spew"] @@ -358,14 +396,19 @@ [[projects]] branch = "master" - digest = "1:8fea5718d84af17762195beb6fe92a0d6c1048452a1dbc464d227f12e0cff0cc" + digest = "1:a26b7b56aece087165b8db87afb05db8495252449553ca20d15f5a24202f36bc" name = "github.com/go-macaron/session" packages = [ ".", + "couchbase", + "memcache", + "mysql", + "nodb", + "postgres", "redis", ] pruneopts = "NUT" - revision = "330e4e4d8beb7b00111ac34539561f46f94c4458" + revision = "0a0a789bf1934e55fde19629869caa015a40c525" [[projects]] digest = "1:758d2371fcdee6d02565901b348729053c636055e67ef6e17aa466c7ff6cc57c" @@ -580,6 +623,28 @@ revision = "e3534c89ef969912856dfa39e56b09e58c5f5daf" [[projects]] + digest = "1:1e6a29ed1f189354030e3371f63ec58aacbc2bf232fd104c6e0d41174ac5af48" + name = "github.com/lunny/log" + packages = ["."] + pruneopts = "NUT" + revision = "7887c61bf0de75586961948b286be6f7d05d9f58" + version = "v0.1" + +[[projects]] + branch = "master" + digest = "1:683d835728cb95d176d423b522420eb5e4ec859b276bca18466476b82b3ebc4c" + name = "github.com/lunny/nodb" + packages = [ + ".", + "config", + "store", + "store/driver", + "store/goleveldb", + ] + pruneopts = "NUT" + revision = "fc1ef06ad4af0da31cdb87e3fa5ec084c67e6597" + +[[projects]] digest = "1:aa7dcd6a0db70d514821f8739d0a22e7df33b499d8d399cf15b2858d44f8319e" name = "github.com/markbates/goth" packages = [ @@ -683,6 +748,14 @@ version = "v1.0.0" [[projects]] + digest = "1:14715f705ff5dfe0ffd6571d7d201dd8e921030f8070321a79380d8ca4ec1a24" + name = "github.com/pkg/errors" + packages = ["."] + pruneopts = "NUT" + revision = "ba968bfe8b2f7e042a574c888954fccecfa385b4" + version = "v0.8.1" + +[[projects]] digest = "1:0028cb19b2e4c3112225cd871870f2d9cf49b9b4276531f03438a88e94be86fe" name = "github.com/pmezard/go-difflib" packages = ["difflib"] @@ -776,6 +849,14 @@ revision = "1dba4b3954bc059efc3991ec364f9f9a35f597d2" [[projects]] + branch = "master" + digest = "1:dbda803f21e60c38de7d9f884390f2ebbe234ce0c3d139b65bbb36b03a99d266" + name = "github.com/siddontang/go-snappy" + packages = ["snappy"] + pruneopts = "NUT" + revision = "d8f7bb82a96d89c1254e5a6c967134e1433c9ee2" + +[[projects]] digest = "1:89fd77d603a74a6540d60067debad9397865bf040955d907362c95d364baeba6" name = "github.com/src-d/gcfg" packages = [ @@ -806,6 +887,27 @@ [[projects]] branch = "master" + digest = "1:685fdfea42d825ebd39ee0994354b46c374cf2c2b2d97a41a8dee1807c6a9b62" + name = "github.com/syndtr/goleveldb" + packages = [ + "leveldb", + "leveldb/cache", + "leveldb/comparer", + "leveldb/errors", + "leveldb/filter", + "leveldb/iterator", + "leveldb/journal", + "leveldb/memdb", + "leveldb/opt", + "leveldb/storage", + "leveldb/table", + "leveldb/util", + ] + pruneopts = "NUT" + revision = "2f17a3356c6616cbfc4ae4c38147dc062a68fb0e" + +[[projects]] + branch = "master" digest = "1:3cb6dfe7cdece5716b1c3c3c0b5faf7fce2e83e2758e2baad2e9986d101980b8" name = "github.com/tinylib/msgp" packages = ["msgp"] @@ -1150,6 +1252,11 @@ "github.com/go-macaron/i18n", "github.com/go-macaron/inject", "github.com/go-macaron/session", + "github.com/go-macaron/session/couchbase", + "github.com/go-macaron/session/memcache", + "github.com/go-macaron/session/mysql", + "github.com/go-macaron/session/nodb", + "github.com/go-macaron/session/postgres", "github.com/go-macaron/session/redis", "github.com/go-macaron/toolbox", "github.com/go-sql-driver/mysql", diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index c2d22f1d83..8254316716 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -250,7 +250,7 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. ## Session (`session`) -- `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, mysql\]. +- `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, mysql, couchbase, memcache, nodb, postgres\]. - `PROVIDER_CONFIG`: **data/sessions**: For file, the root path; for others, the connection string. - `COOKIE_SECURE`: **false**: Enable this to force using HTTPS for all session access. - `COOKIE_NAME`: **i\_like\_gitea**: The name of the cookie used for the session ID. diff --git a/modules/setting/setting.go b/modules/setting/setting.go index a2fdbf5192..77f0725d00 100644 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -31,7 +31,12 @@ import ( _ "github.com/go-macaron/cache/memcache" // memcache plugin for cache _ "github.com/go-macaron/cache/redis" "github.com/go-macaron/session" - _ "github.com/go-macaron/session/redis" // redis plugin for store session + _ "github.com/go-macaron/session/couchbase" // couchbase plugin for session store + _ "github.com/go-macaron/session/memcache" // memcache plugin for session store + _ "github.com/go-macaron/session/mysql" // mysql plugin for session store + _ "github.com/go-macaron/session/nodb" // nodb plugin for session store + _ "github.com/go-macaron/session/postgres" // postgres plugin for session store + _ "github.com/go-macaron/session/redis" // redis plugin for store session "github.com/go-xorm/core" shellquote "github.com/kballard/go-shellquote" version "github.com/mcuadros/go-version" @@ -1506,7 +1511,7 @@ func newCacheService() { func newSessionService() { SessionConfig.Provider = Cfg.Section("session").Key("PROVIDER").In("memory", - []string{"memory", "file", "redis", "mysql"}) + []string{"memory", "file", "redis", "mysql", "postgres", "couchbase", "memcache", "nodb"}) SessionConfig.ProviderConfig = strings.Trim(Cfg.Section("session").Key("PROVIDER_CONFIG").MustString(path.Join(AppDataPath, "sessions")), "\" ") if SessionConfig.Provider == "file" && !filepath.IsAbs(SessionConfig.ProviderConfig) { SessionConfig.ProviderConfig = path.Join(AppWorkPath, SessionConfig.ProviderConfig) diff --git a/vendor/github.com/BurntSushi/toml/COPYING b/vendor/github.com/BurntSushi/toml/COPYING new file mode 100644 index 0000000000..01b5743200 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/COPYING @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 TOML authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING b/vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING new file mode 100644 index 0000000000..01b5743200 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/cmd/toml-test-decoder/COPYING @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 TOML authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING b/vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING new file mode 100644 index 0000000000..01b5743200 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/cmd/toml-test-encoder/COPYING @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 TOML authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING b/vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING new file mode 100644 index 0000000000..01b5743200 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 TOML authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/BurntSushi/toml/decode.go b/vendor/github.com/BurntSushi/toml/decode.go new file mode 100644 index 0000000000..b0fd51d5b6 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/decode.go @@ -0,0 +1,509 @@ +package toml + +import ( + "fmt" + "io" + "io/ioutil" + "math" + "reflect" + "strings" + "time" +) + +func e(format string, args ...interface{}) error { + return fmt.Errorf("toml: "+format, args...) +} + +// Unmarshaler is the interface implemented by objects that can unmarshal a +// TOML description of themselves. +type Unmarshaler interface { + UnmarshalTOML(interface{}) error +} + +// Unmarshal decodes the contents of `p` in TOML format into a pointer `v`. +func Unmarshal(p []byte, v interface{}) error { + _, err := Decode(string(p), v) + return err +} + +// Primitive is a TOML value that hasn't been decoded into a Go value. +// When using the various `Decode*` functions, the type `Primitive` may +// be given to any value, and its decoding will be delayed. +// +// A `Primitive` value can be decoded using the `PrimitiveDecode` function. +// +// The underlying representation of a `Primitive` value is subject to change. +// Do not rely on it. +// +// N.B. Primitive values are still parsed, so using them will only avoid +// the overhead of reflection. They can be useful when you don't know the +// exact type of TOML data until run time. +type Primitive struct { + undecoded interface{} + context Key +} + +// DEPRECATED! +// +// Use MetaData.PrimitiveDecode instead. +func PrimitiveDecode(primValue Primitive, v interface{}) error { + md := MetaData{decoded: make(map[string]bool)} + return md.unify(primValue.undecoded, rvalue(v)) +} + +// PrimitiveDecode is just like the other `Decode*` functions, except it +// decodes a TOML value that has already been parsed. Valid primitive values +// can *only* be obtained from values filled by the decoder functions, +// including this method. (i.e., `v` may contain more `Primitive` +// values.) +// +// Meta data for primitive values is included in the meta data returned by +// the `Decode*` functions with one exception: keys returned by the Undecoded +// method will only reflect keys that were decoded. Namely, any keys hidden +// behind a Primitive will be considered undecoded. Executing this method will +// update the undecoded keys in the meta data. (See the example.) +func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error { + md.context = primValue.context + defer func() { md.context = nil }() + return md.unify(primValue.undecoded, rvalue(v)) +} + +// Decode will decode the contents of `data` in TOML format into a pointer +// `v`. +// +// TOML hashes correspond to Go structs or maps. (Dealer's choice. They can be +// used interchangeably.) +// +// TOML arrays of tables correspond to either a slice of structs or a slice +// of maps. +// +// TOML datetimes correspond to Go `time.Time` values. +// +// All other TOML types (float, string, int, bool and array) correspond +// to the obvious Go types. +// +// An exception to the above rules is if a type implements the +// encoding.TextUnmarshaler interface. In this case, any primitive TOML value +// (floats, strings, integers, booleans and datetimes) will be converted to +// a byte string and given to the value's UnmarshalText method. See the +// Unmarshaler example for a demonstration with time duration strings. +// +// Key mapping +// +// TOML keys can map to either keys in a Go map or field names in a Go +// struct. The special `toml` struct tag may be used to map TOML keys to +// struct fields that don't match the key name exactly. (See the example.) +// A case insensitive match to struct names will be tried if an exact match +// can't be found. +// +// The mapping between TOML values and Go values is loose. That is, there +// may exist TOML values that cannot be placed into your representation, and +// there may be parts of your representation that do not correspond to +// TOML values. This loose mapping can be made stricter by using the IsDefined +// and/or Undecoded methods on the MetaData returned. +// +// This decoder will not handle cyclic types. If a cyclic type is passed, +// `Decode` will not terminate. +func Decode(data string, v interface{}) (MetaData, error) { + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Ptr { + return MetaData{}, e("Decode of non-pointer %s", reflect.TypeOf(v)) + } + if rv.IsNil() { + return MetaData{}, e("Decode of nil %s", reflect.TypeOf(v)) + } + p, err := parse(data) + if err != nil { + return MetaData{}, err + } + md := MetaData{ + p.mapping, p.types, p.ordered, + make(map[string]bool, len(p.ordered)), nil, + } + return md, md.unify(p.mapping, indirect(rv)) +} + +// DecodeFile is just like Decode, except it will automatically read the +// contents of the file at `fpath` and decode it for you. +func DecodeFile(fpath string, v interface{}) (MetaData, error) { + bs, err := ioutil.ReadFile(fpath) + if err != nil { + return MetaData{}, err + } + return Decode(string(bs), v) +} + +// DecodeReader is just like Decode, except it will consume all bytes +// from the reader and decode it for you. +func DecodeReader(r io.Reader, v interface{}) (MetaData, error) { + bs, err := ioutil.ReadAll(r) + if err != nil { + return MetaData{}, err + } + return Decode(string(bs), v) +} + +// unify performs a sort of type unification based on the structure of `rv`, +// which is the client representation. +// +// Any type mismatch produces an error. Finding a type that we don't know +// how to handle produces an unsupported type error. +func (md *MetaData) unify(data interface{}, rv reflect.Value) error { + + // Special case. Look for a `Primitive` value. + if rv.Type() == reflect.TypeOf((*Primitive)(nil)).Elem() { + // Save the undecoded data and the key context into the primitive + // value. + context := make(Key, len(md.context)) + copy(context, md.context) + rv.Set(reflect.ValueOf(Primitive{ + undecoded: data, + context: context, + })) + return nil + } + + // Special case. Unmarshaler Interface support. + if rv.CanAddr() { + if v, ok := rv.Addr().Interface().(Unmarshaler); ok { + return v.UnmarshalTOML(data) + } + } + + // Special case. Handle time.Time values specifically. + // TODO: Remove this code when we decide to drop support for Go 1.1. + // This isn't necessary in Go 1.2 because time.Time satisfies the encoding + // interfaces. + if rv.Type().AssignableTo(rvalue(time.Time{}).Type()) { + return md.unifyDatetime(data, rv) + } + + // Special case. Look for a value satisfying the TextUnmarshaler interface. + if v, ok := rv.Interface().(TextUnmarshaler); ok { + return md.unifyText(data, v) + } + // BUG(burntsushi) + // The behavior here is incorrect whenever a Go type satisfies the + // encoding.TextUnmarshaler interface but also corresponds to a TOML + // hash or array. In particular, the unmarshaler should only be applied + // to primitive TOML values. But at this point, it will be applied to + // all kinds of values and produce an incorrect error whenever those values + // are hashes or arrays (including arrays of tables). + + k := rv.Kind() + + // laziness + if k >= reflect.Int && k <= reflect.Uint64 { + return md.unifyInt(data, rv) + } + switch k { + case reflect.Ptr: + elem := reflect.New(rv.Type().Elem()) + err := md.unify(data, reflect.Indirect(elem)) + if err != nil { + return err + } + rv.Set(elem) + return nil + case reflect.Struct: + return md.unifyStruct(data, rv) + case reflect.Map: + return md.unifyMap(data, rv) + case reflect.Array: + return md.unifyArray(data, rv) + case reflect.Slice: + return md.unifySlice(data, rv) + case reflect.String: + return md.unifyString(data, rv) + case reflect.Bool: + return md.unifyBool(data, rv) + case reflect.Interface: + // we only support empty interfaces. + if rv.NumMethod() > 0 { + return e("unsupported type %s", rv.Type()) + } + return md.unifyAnything(data, rv) + case reflect.Float32: + fallthrough + case reflect.Float64: + return md.unifyFloat64(data, rv) + } + return e("unsupported type %s", rv.Kind()) +} + +func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error { + tmap, ok := mapping.(map[string]interface{}) + if !ok { + if mapping == nil { + return nil + } + return e("type mismatch for %s: expected table but found %T", + rv.Type().String(), mapping) + } + + for key, datum := range tmap { + var f *field + fields := cachedTypeFields(rv.Type()) + for i := range fields { + ff := &fields[i] + if ff.name == key { + f = ff + break + } + if f == nil && strings.EqualFold(ff.name, key) { + f = ff + } + } + if f != nil { + subv := rv + for _, i := range f.index { + subv = indirect(subv.Field(i)) + } + if isUnifiable(subv) { + md.decoded[md.context.add(key).String()] = true + md.context = append(md.context, key) + if err := md.unify(datum, subv); err != nil { + return err + } + md.context = md.context[0 : len(md.context)-1] + } else if f.name != "" { + // Bad user! No soup for you! + return e("cannot write unexported field %s.%s", + rv.Type().String(), f.name) + } + } + } + return nil +} + +func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error { + tmap, ok := mapping.(map[string]interface{}) + if !ok { + if tmap == nil { + return nil + } + return badtype("map", mapping) + } + if rv.IsNil() { + rv.Set(reflect.MakeMap(rv.Type())) + } + for k, v := range tmap { + md.decoded[md.context.add(k).String()] = true + md.context = append(md.context, k) + + rvkey := indirect(reflect.New(rv.Type().Key())) + rvval := reflect.Indirect(reflect.New(rv.Type().Elem())) + if err := md.unify(v, rvval); err != nil { + return err + } + md.context = md.context[0 : len(md.context)-1] + + rvkey.SetString(k) + rv.SetMapIndex(rvkey, rvval) + } + return nil +} + +func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error { + datav := reflect.ValueOf(data) + if datav.Kind() != reflect.Slice { + if !datav.IsValid() { + return nil + } + return badtype("slice", data) + } + sliceLen := datav.Len() + if sliceLen != rv.Len() { + return e("expected array length %d; got TOML array of length %d", + rv.Len(), sliceLen) + } + return md.unifySliceArray(datav, rv) +} + +func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error { + datav := reflect.ValueOf(data) + if datav.Kind() != reflect.Slice { + if !datav.IsValid() { + return nil + } + return badtype("slice", data) + } + n := datav.Len() + if rv.IsNil() || rv.Cap() < n { + rv.Set(reflect.MakeSlice(rv.Type(), n, n)) + } + rv.SetLen(n) + return md.unifySliceArray(datav, rv) +} + +func (md *MetaData) unifySliceArray(data, rv reflect.Value) error { + sliceLen := data.Len() + for i := 0; i < sliceLen; i++ { + v := data.Index(i).Interface() + sliceval := indirect(rv.Index(i)) + if err := md.unify(v, sliceval); err != nil { + return err + } + } + return nil +} + +func (md *MetaData) unifyDatetime(data interface{}, rv reflect.Value) error { + if _, ok := data.(time.Time); ok { + rv.Set(reflect.ValueOf(data)) + return nil + } + return badtype("time.Time", data) +} + +func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error { + if s, ok := data.(string); ok { + rv.SetString(s) + return nil + } + return badtype("string", data) +} + +func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error { + if num, ok := data.(float64); ok { + switch rv.Kind() { + case reflect.Float32: + fallthrough + case reflect.Float64: + rv.SetFloat(num) + default: + panic("bug") + } + return nil + } + return badtype("float", data) +} + +func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error { + if num, ok := data.(int64); ok { + if rv.Kind() >= reflect.Int && rv.Kind() <= reflect.Int64 { + switch rv.Kind() { + case reflect.Int, reflect.Int64: + // No bounds checking necessary. + case reflect.Int8: + if num < math.MinInt8 || num > math.MaxInt8 { + return e("value %d is out of range for int8", num) + } + case reflect.Int16: + if num < math.MinInt16 || num > math.MaxInt16 { + return e("value %d is out of range for int16", num) + } + case reflect.Int32: + if num < math.MinInt32 || num > math.MaxInt32 { + return e("value %d is out of range for int32", num) + } + } + rv.SetInt(num) + } else if rv.Kind() >= reflect.Uint && rv.Kind() <= reflect.Uint64 { + unum := uint64(num) + switch rv.Kind() { + case reflect.Uint, reflect.Uint64: + // No bounds checking necessary. + case reflect.Uint8: + if num < 0 || unum > math.MaxUint8 { + return e("value %d is out of range for uint8", num) + } + case reflect.Uint16: + if num < 0 || unum > math.MaxUint16 { + return e("value %d is out of range for uint16", num) + } + case reflect.Uint32: + if num < 0 || unum > math.MaxUint32 { + return e("value %d is out of range for uint32", num) + } + } + rv.SetUint(unum) + } else { + panic("unreachable") + } + return nil + } + return badtype("integer", data) +} + +func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error { + if b, ok := data.(bool); ok { + rv.SetBool(b) + return nil + } + return badtype("boolean", data) +} + +func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error { + rv.Set(reflect.ValueOf(data)) + return nil +} + +func (md *MetaData) unifyText(data interface{}, v TextUnmarshaler) error { + var s string + switch sdata := data.(type) { + case TextMarshaler: + text, err := sdata.MarshalText() + if err != nil { + return err + } + s = string(text) + case fmt.Stringer: + s = sdata.String() + case string: + s = sdata + case bool: + s = fmt.Sprintf("%v", sdata) + case int64: + s = fmt.Sprintf("%d", sdata) + case float64: + s = fmt.Sprintf("%f", sdata) + default: + return badtype("primitive (string-like)", data) + } + if err := v.UnmarshalText([]byte(s)); err != nil { + return err + } + return nil +} + +// rvalue returns a reflect.Value of `v`. All pointers are resolved. +func rvalue(v interface{}) reflect.Value { + return indirect(reflect.ValueOf(v)) +} + +// indirect returns the value pointed to by a pointer. +// Pointers are followed until the value is not a pointer. +// New values are allocated for each nil pointer. +// +// An exception to this rule is if the value satisfies an interface of +// interest to us (like encoding.TextUnmarshaler). +func indirect(v reflect.Value) reflect.Value { + if v.Kind() != reflect.Ptr { + if v.CanSet() { + pv := v.Addr() + if _, ok := pv.Interface().(TextUnmarshaler); ok { + return pv + } + } + return v + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + return indirect(reflect.Indirect(v)) +} + +func isUnifiable(rv reflect.Value) bool { + if rv.CanSet() { + return true + } + if _, ok := rv.Interface().(TextUnmarshaler); ok { + return true + } + return false +} + +func badtype(expected string, data interface{}) error { + return e("cannot load TOML value of type %T into a Go %s", data, expected) +} diff --git a/vendor/github.com/BurntSushi/toml/decode_meta.go b/vendor/github.com/BurntSushi/toml/decode_meta.go new file mode 100644 index 0000000000..b9914a6798 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/decode_meta.go @@ -0,0 +1,121 @@ +package toml + +import "strings" + +// MetaData allows access to meta information about TOML data that may not +// be inferrable via reflection. In particular, whether a key has been defined +// and the TOML type of a key. +type MetaData struct { + mapping map[string]interface{} + types map[string]tomlType + keys []Key + decoded map[string]bool + context Key // Used only during decoding. +} + +// IsDefined returns true if the key given exists in the TOML data. The key +// should be specified hierarchially. e.g., +// +// // access the TOML key 'a.b.c' +// IsDefined("a", "b", "c") +// +// IsDefined will return false if an empty key given. Keys are case sensitive. +func (md *MetaData) IsDefined(key ...string) bool { + if len(key) == 0 { + return false + } + + var hash map[string]interface{} + var ok bool + var hashOrVal interface{} = md.mapping + for _, k := range key { + if hash, ok = hashOrVal.(map[string]interface{}); !ok { + return false + } + if hashOrVal, ok = hash[k]; !ok { + return false + } + } + return true +} + +// Type returns a string representation of the type of the key specified. +// +// Type will return the empty string if given an empty key or a key that +// does not exist. Keys are case sensitive. +func (md *MetaData) Type(key ...string) string { + fullkey := strings.Join(key, ".") + if typ, ok := md.types[fullkey]; ok { + return typ.typeString() + } + return "" +} + +// Key is the type of any TOML key, including key groups. Use (MetaData).Keys +// to get values of this type. +type Key []string + +func (k Key) String() string { + return strings.Join(k, ".") +} + +func (k Key) maybeQuotedAll() string { + var ss []string + for i := range k { + ss = append(ss, k.maybeQuoted(i)) + } + return strings.Join(ss, ".") +} + +func (k Key) maybeQuoted(i int) string { + quote := false + for _, c := range k[i] { + if !isBareKeyChar(c) { + quote = true + break + } + } + if quote { + return "\"" + strings.Replace(k[i], "\"", "\\\"", -1) + "\"" + } + return k[i] +} + +func (k Key) add(piece string) Key { + newKey := make(Key, len(k)+1) + copy(newKey, k) + newKey[len(k)] = piece + return newKey +} + +// Keys returns a slice of every key in the TOML data, including key groups. +// Each key is itself a slice, where the first element is the top of the +// hierarchy and the last is the most specific. +// +// The list will have the same order as the keys appeared in the TOML data. +// +// All keys returned are non-empty. +func (md *MetaData) Keys() []Key { + return md.keys +} + +// Undecoded returns all keys that have not been decoded in the order in which +// they appear in the original TOML document. +// +// This includes keys that haven't been decoded because of a Primitive value. +// Once the Primitive value is decoded, the keys will be considered decoded. +// +// Also note that decoding into an empty interface will result in no decoding, +// and so no keys will be considered decoded. +// +// In this sense, the Undecoded keys correspond to keys in the TOML document +// that do not have a concrete type in your representation. +func (md *MetaData) Undecoded() []Key { + undecoded := make([]Key, 0, len(md.keys)) + for _, key := range md.keys { + if !md.decoded[key.String()] { + undecoded = append(undecoded, key) + } + } + return undecoded +} diff --git a/vendor/github.com/BurntSushi/toml/doc.go b/vendor/github.com/BurntSushi/toml/doc.go new file mode 100644 index 0000000000..b371f396ed --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/doc.go @@ -0,0 +1,27 @@ +/* +Package toml provides facilities for decoding and encoding TOML configuration +files via reflection. There is also support for delaying decoding with +the Primitive type, and querying the set of keys in a TOML document with the +MetaData type. + +The specification implemented: https://github.com/toml-lang/toml + +The sub-command github.com/BurntSushi/toml/cmd/tomlv can be used to verify +whether a file is a valid TOML document. It can also be used to print the +type of each key in a TOML document. + +Testing + +There are two important types of tests used for this package. The first is +contained inside '*_test.go' files and uses the standard Go unit testing +framework. These tests are primarily devoted to holistically testing the +decoder and encoder. + +The second type of testing is used to verify the implementation's adherence +to the TOML specification. These tests have been factored into their own +project: https://github.com/BurntSushi/toml-test + +The reason the tests are in a separate project is so that they can be used by +any implementation of TOML. Namely, it is language agnostic. +*/ +package toml diff --git a/vendor/github.com/BurntSushi/toml/encode.go b/vendor/github.com/BurntSushi/toml/encode.go new file mode 100644 index 0000000000..d905c21a24 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/encode.go @@ -0,0 +1,568 @@ +package toml + +import ( + "bufio" + "errors" + "fmt" + "io" + "reflect" + "sort" + "strconv" + "strings" + "time" +) + +type tomlEncodeError struct{ error } + +var ( + errArrayMixedElementTypes = errors.New( + "toml: cannot encode array with mixed element types") + errArrayNilElement = errors.New( + "toml: cannot encode array with nil element") + errNonString = errors.New( + "toml: cannot encode a map with non-string key type") + errAnonNonStruct = errors.New( + "toml: cannot encode an anonymous field that is not a struct") + errArrayNoTable = errors.New( + "toml: TOML array element cannot contain a table") + errNoKey = errors.New( + "toml: top-level values must be Go maps or structs") + errAnything = errors.New("") // used in testing +) + +var quotedReplacer = strings.NewReplacer( + "\t", "\\t", + "\n", "\\n", + "\r", "\\r", + "\"", "\\\"", + "\\", "\\\\", +) + +// Encoder controls the encoding of Go values to a TOML document to some +// io.Writer. +// +// The indentation level can be controlled with the Indent field. +type Encoder struct { + // A single indentation level. By default it is two spaces. + Indent string + + // hasWritten is whether we have written any output to w yet. + hasWritten bool + w *bufio.Writer +} + +// NewEncoder returns a TOML encoder that encodes Go values to the io.Writer +// given. By default, a single indentation level is 2 spaces. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{ + w: bufio.NewWriter(w), + Indent: " ", + } +} + +// Encode writes a TOML representation of the Go value to the underlying +// io.Writer. If the value given cannot be encoded to a valid TOML document, +// then an error is returned. +// +// The mapping between Go values and TOML values should be precisely the same +// as for the Decode* functions. Similarly, the TextMarshaler interface is +// supported by encoding the resulting bytes as strings. (If you want to write +// arbitrary binary data then you will need to use something like base64 since +// TOML does not have any binary types.) +// +// When encoding TOML hashes (i.e., Go maps or structs), keys without any +// sub-hashes are encoded first. +// +// If a Go map is encoded, then its keys are sorted alphabetically for +// deterministic output. More control over this behavior may be provided if +// there is demand for it. +// +// Encoding Go values without a corresponding TOML representation---like map +// types with non-string keys---will cause an error to be returned. Similarly +// for mixed arrays/slices, arrays/slices with nil elements, embedded +// non-struct types and nested slices containing maps or structs. +// (e.g., [][]map[string]string is not allowed but []map[string]string is OK +// and so is []map[string][]string.) +func (enc *Encoder) Encode(v interface{}) error { + rv := eindirect(reflect.ValueOf(v)) + if err := enc.safeEncode(Key([]string{}), rv); err != nil { + return err + } + return enc.w.Flush() +} + +func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) { + defer func() { + if r := recover(); r != nil { + if terr, ok := r.(tomlEncodeError); ok { + err = terr.error + return + } + panic(r) + } + }() + enc.encode(key, rv) + return nil +} + +func (enc *Encoder) encode(key Key, rv reflect.Value) { + // Special case. Time needs to be in ISO8601 format. + // Special case. If we can marshal the type to text, then we used that. + // Basically, this prevents the encoder for handling these types as + // generic structs (or whatever the underlying type of a TextMarshaler is). + switch rv.Interface().(type) { + case time.Time, TextMarshaler: + enc.keyEqElement(key, rv) + return + } + + k := rv.Kind() + switch k { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, + reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, + reflect.Uint64, + reflect.Float32, reflect.Float64, reflect.String, reflect.Bool: + enc.keyEqElement(key, rv) + case reflect.Array, reflect.Slice: + if typeEqual(tomlArrayHash, tomlTypeOfGo(rv)) { + enc.eArrayOfTables(key, rv) + } else { + enc.keyEqElement(key, rv) + } + case reflect.Interface: + if rv.IsNil() { + return + } + enc.encode(key, rv.Elem()) + case reflect.Map: + if rv.IsNil() { + return + } + enc.eTable(key, rv) + case reflect.Ptr: + if rv.IsNil() { + return + } + enc.encode(key, rv.Elem()) + case reflect.Struct: + enc.eTable(key, rv) + default: + panic(e("unsupported type for key '%s': %s", key, k)) + } +} + +// eElement encodes any value that can be an array element (primitives and +// arrays). +func (enc *Encoder) eElement(rv reflect.Value) { + switch v := rv.Interface().(type) { + case time.Time: + // Special case time.Time as a primitive. Has to come before + // TextMarshaler below because time.Time implements + // encoding.TextMarshaler, but we need to always use UTC. + enc.wf(v.UTC().Format("2006-01-02T15:04:05Z")) + return + case TextMarshaler: + // Special case. Use text marshaler if it's available for this value. + if s, err := v.MarshalText(); err != nil { + encPanic(err) + } else { + enc.writeQuoted(string(s)) + } + return + } + switch rv.Kind() { + case reflect.Bool: + enc.wf(strconv.FormatBool(rv.Bool())) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, + reflect.Int64: + enc.wf(strconv.FormatInt(rv.Int(), 10)) + case reflect.Uint, reflect.Uint8, reflect.Uint16, + reflect.Uint32, reflect.Uint64: + enc.wf(strconv.FormatUint(rv.Uint(), 10)) + case reflect.Float32: + enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 32))) + case reflect.Float64: + enc.wf(floatAddDecimal(strconv.FormatFloat(rv.Float(), 'f', -1, 64))) + case reflect.Array, reflect.Slice: + enc.eArrayOrSliceElement(rv) + case reflect.Interface: + enc.eElement(rv.Elem()) + case reflect.String: + enc.writeQuoted(rv.String()) + default: + panic(e("unexpected primitive type: %s", rv.Kind())) + } +} + +// By the TOML spec, all floats must have a decimal with at least one +// number on either side. +func floatAddDecimal(fstr string) string { + if !strings.Contains(fstr, ".") { + return fstr + ".0" + } + return fstr +} + +func (enc *Encoder) writeQuoted(s string) { + enc.wf("\"%s\"", quotedReplacer.Replace(s)) +} + +func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) { + length := rv.Len() + enc.wf("[") + for i := 0; i < length; i++ { + elem := rv.Index(i) + enc.eElement(elem) + if i != length-1 { + enc.wf(", ") + } + } + enc.wf("]") +} + +func (enc *Encoder) eArrayOfTables(key Key, rv reflect.Value) { + if len(key) == 0 { + encPanic(errNoKey) + } + for i := 0; i < rv.Len(); i++ { + trv := rv.Index(i) + if isNil(trv) { + continue + } + panicIfInvalidKey(key) + enc.newline() + enc.wf("%s[[%s]]", enc.indentStr(key), key.maybeQuotedAll()) + enc.newline() + enc.eMapOrStruct(key, trv) + } +} + +func (enc *Encoder) eTable(key Key, rv reflect.Value) { + panicIfInvalidKey(key) + if len(key) == 1 { + // Output an extra newline between top-level tables. + // (The newline isn't written if nothing else has been written though.) + enc.newline() + } + if len(key) > 0 { + enc.wf("%s[%s]", enc.indentStr(key), key.maybeQuotedAll()) + enc.newline() + } + enc.eMapOrStruct(key, rv) +} + +func (enc *Encoder) eMapOrStruct(key Key, rv reflect.Value) { + switch rv := eindirect(rv); rv.Kind() { + case reflect.Map: + enc.eMap(key, rv) + case reflect.Struct: + enc.eStruct(key, rv) + default: + panic("eTable: unhandled reflect.Value Kind: " + rv.Kind().String()) + } +} + +func (enc *Encoder) eMap(key Key, rv reflect.Value) { + rt := rv.Type() + if rt.Key().Kind() != reflect.String { + encPanic(errNonString) + } + + // Sort keys so that we have deterministic output. And write keys directly + // underneath this key first, before writing sub-structs or sub-maps. + var mapKeysDirect, mapKeysSub []string + for _, mapKey := range rv.MapKeys() { + k := mapKey.String() + if typeIsHash(tomlTypeOfGo(rv.MapIndex(mapKey))) { + mapKeysSub = append(mapKeysSub, k) + } else { + mapKeysDirect = append(mapKeysDirect, k) + } + } + + var writeMapKeys = func(mapKeys []string) { + sort.Strings(mapKeys) + for _, mapKey := range mapKeys { + mrv := rv.MapIndex(reflect.ValueOf(mapKey)) + if isNil(mrv) { + // Don't write anything for nil fields. + continue + } + enc.encode(key.add(mapKey), mrv) + } + } + writeMapKeys(mapKeysDirect) + writeMapKeys(mapKeysSub) +} + +func (enc *Encoder) eStruct(key Key, rv reflect.Value) { + // Write keys for fields directly under this key first, because if we write + // a field that creates a new table, then all keys under it will be in that + // table (not the one we're writing here). + rt := rv.Type() + var fieldsDirect, fieldsSub [][]int + var addFields func(rt reflect.Type, rv reflect.Value, start []int) + addFields = func(rt reflect.Type, rv reflect.Value, start []int) { + for i := 0; i < rt.NumField(); i++ { + f := rt.Field(i) + // skip unexported fields + if f.PkgPath != "" && !f.Anonymous { + continue + } + frv := rv.Field(i) + if f.Anonymous { + t := f.Type + switch t.Kind() { + case reflect.Struct: + // Treat anonymous struct fields with + // tag names as though they are not + // anonymous, like encoding/json does. + if getOptions(f.Tag).name == "" { + addFields(t, frv, f.Index) + continue + } + case reflect.Ptr: + if t.Elem().Kind() == reflect.Struct && + getOptions(f.Tag).name == "" { + if !frv.IsNil() { + addFields(t.Elem(), frv.Elem(), f.Index) + } + continue + } + // Fall through to the normal field encoding logic below + // for non-struct anonymous fields. + } + } + + if typeIsHash(tomlTypeOfGo(frv)) { + fieldsSub = append(fieldsSub, append(start, f.Index...)) + } else { + fieldsDirect = append(fieldsDirect, append(start, f.Index...)) + } + } + } + addFields(rt, rv, nil) + + var writeFields = func(fields [][]int) { + for _, fieldIndex := range fields { + sft := rt.FieldByIndex(fieldIndex) + sf := rv.FieldByIndex(fieldIndex) + if isNil(sf) { + // Don't write anything for nil fields. + continue + } + + opts := getOptions(sft.Tag) + if opts.skip { + continue + } + keyName := sft.Name + if opts.name != "" { + keyName = opts.name + } + if opts.omitempty && isEmpty(sf) { + continue + } + if opts.omitzero && isZero(sf) { + continue + } + + enc.encode(key.add(keyName), sf) + } + } + writeFields(fieldsDirect) + writeFields(fieldsSub) +} + +// tomlTypeName returns the TOML type name of the Go value's type. It is +// used to determine whether the types of array elements are mixed (which is +// forbidden). If the Go value is nil, then it is illegal for it to be an array +// element, and valueIsNil is returned as true. + +// Returns the TOML type of a Go value. The type may be `nil`, which means +// no concrete TOML type could be found. +func tomlTypeOfGo(rv reflect.Value) tomlType { + if isNil(rv) || !rv.IsValid() { + return nil + } + switch rv.Kind() { + case reflect.Bool: + return tomlBool + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, + reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, + reflect.Uint64: + return tomlInteger + case reflect.Float32, reflect.Float64: + return tomlFloat + case reflect.Array, reflect.Slice: + if typeEqual(tomlHash, tomlArrayType(rv)) { + return tomlArrayHash + } + return tomlArray + case reflect.Ptr, reflect.Interface: + return tomlTypeOfGo(rv.Elem()) + case reflect.String: + return tomlString + case reflect.Map: + return tomlHash + case reflect.Struct: + switch rv.Interface().(type) { + case time.Time: + return tomlDatetime + case TextMarshaler: + return tomlString + default: + return tomlHash + } + default: + panic("unexpected reflect.Kind: " + rv.Kind().String()) + } +} + +// tomlArrayType returns the element type of a TOML array. The type returned +// may be nil if it cannot be determined (e.g., a nil slice or a zero length +// slize). This function may also panic if it finds a type that cannot be +// expressed in TOML (such as nil elements, heterogeneous arrays or directly +// nested arrays of tables). +func tomlArrayType(rv reflect.Value) tomlType { + if isNil(rv) || !rv.IsValid() || rv.Len() == 0 { + return nil + } + firstType := tomlTypeOfGo(rv.Index(0)) + if firstType == nil { + encPanic(errArrayNilElement) + } + + rvlen := rv.Len() + for i := 1; i < rvlen; i++ { + elem := rv.Index(i) + switch elemType := tomlTypeOfGo(elem); { + case elemType == nil: + encPanic(errArrayNilElement) + case !typeEqual(firstType, elemType): + encPanic(errArrayMixedElementTypes) + } + } + // If we have a nested array, then we must make sure that the nested + // array contains ONLY primitives. + // This checks arbitrarily nested arrays. + if typeEqual(firstType, tomlArray) || typeEqual(firstType, tomlArrayHash) { + nest := tomlArrayType(eindirect(rv.Index(0))) + if typeEqual(nest, tomlHash) || typeEqual(nest, tomlArrayHash) { + encPanic(errArrayNoTable) + } + } + return firstType +} + +type tagOptions struct { + skip bool // "-" + name string + omitempty bool + omitzero bool +} + +func getOptions(tag reflect.StructTag) tagOptions { + t := tag.Get("toml") + if t == "-" { + return tagOptions{skip: true} + } + var opts tagOptions + parts := strings.Split(t, ",") + opts.name = parts[0] + for _, s := range parts[1:] { + switch s { + case "omitempty": + opts.omitempty = true + case "omitzero": + opts.omitzero = true + } + } + return opts +} + +func isZero(rv reflect.Value) bool { + switch rv.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return rv.Int() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + return rv.Uint() == 0 + case reflect.Float32, reflect.Float64: + return rv.Float() == 0.0 + } + return false +} + +func isEmpty(rv reflect.Value) bool { + switch rv.Kind() { + case reflect.Array, reflect.Slice, reflect.Map, reflect.String: + return rv.Len() == 0 + case reflect.Bool: + return !rv.Bool() + } + return false +} + +func (enc *Encoder) newline() { + if enc.hasWritten { + enc.wf("\n") + } +} + +func (enc *Encoder) keyEqElement(key Key, val reflect.Value) { + if len(key) == 0 { + encPanic(errNoKey) + } + panicIfInvalidKey(key) + enc.wf("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1)) + enc.eElement(val) + enc.newline() +} + +func (enc *Encoder) wf(format string, v ...interface{}) { + if _, err := fmt.Fprintf(enc.w, format, v...); err != nil { + encPanic(err) + } + enc.hasWritten = true +} + +func (enc *Encoder) indentStr(key Key) string { + return strings.Repeat(enc.Indent, len(key)-1) +} + +func encPanic(err error) { + panic(tomlEncodeError{err}) +} + +func eindirect(v reflect.Value) reflect.Value { + switch v.Kind() { + case reflect.Ptr, reflect.Interface: + return eindirect(v.Elem()) + default: + return v + } +} + +func isNil(rv reflect.Value) bool { + switch rv.Kind() { + case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Slice: + return rv.IsNil() + default: + return false + } +} + +func panicIfInvalidKey(key Key) { + for _, k := range key { + if len(k) == 0 { + encPanic(e("Key '%s' is not a valid table name. Key names "+ + "cannot be empty.", key.maybeQuotedAll())) + } + } +} + +func isValidKeyName(s string) bool { + return len(s) != 0 +} diff --git a/vendor/github.com/BurntSushi/toml/encoding_types.go b/vendor/github.com/BurntSushi/toml/encoding_types.go new file mode 100644 index 0000000000..d36e1dd600 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/encoding_types.go @@ -0,0 +1,19 @@ +// +build go1.2 + +package toml + +// In order to support Go 1.1, we define our own TextMarshaler and +// TextUnmarshaler types. For Go 1.2+, we just alias them with the +// standard library interfaces. + +import ( + "encoding" +) + +// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here +// so that Go 1.1 can be supported. +type TextMarshaler encoding.TextMarshaler + +// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined +// here so that Go 1.1 can be supported. +type TextUnmarshaler encoding.TextUnmarshaler diff --git a/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go b/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go new file mode 100644 index 0000000000..e8d503d046 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/encoding_types_1.1.go @@ -0,0 +1,18 @@ +// +build !go1.2 + +package toml + +// These interfaces were introduced in Go 1.2, so we add them manually when +// compiling for Go 1.1. + +// TextMarshaler is a synonym for encoding.TextMarshaler. It is defined here +// so that Go 1.1 can be supported. +type TextMarshaler interface { + MarshalText() (text []byte, err error) +} + +// TextUnmarshaler is a synonym for encoding.TextUnmarshaler. It is defined +// here so that Go 1.1 can be supported. +type TextUnmarshaler interface { + UnmarshalText(text []byte) error +} diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go new file mode 100644 index 0000000000..e0a742a887 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/lex.go @@ -0,0 +1,953 @@ +package toml + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +type itemType int + +const ( + itemError itemType = iota + itemNIL // used in the parser to indicate no type + itemEOF + itemText + itemString + itemRawString + itemMultilineString + itemRawMultilineString + itemBool + itemInteger + itemFloat + itemDatetime + itemArray // the start of an array + itemArrayEnd + itemTableStart + itemTableEnd + itemArrayTableStart + itemArrayTableEnd + itemKeyStart + itemCommentStart + itemInlineTableStart + itemInlineTableEnd +) + +const ( + eof = 0 + comma = ',' + tableStart = '[' + tableEnd = ']' + arrayTableStart = '[' + arrayTableEnd = ']' + tableSep = '.' + keySep = '=' + arrayStart = '[' + arrayEnd = ']' + commentStart = '#' + stringStart = '"' + stringEnd = '"' + rawStringStart = '\'' + rawStringEnd = '\'' + inlineTableStart = '{' + inlineTableEnd = '}' +) + +type stateFn func(lx *lexer) stateFn + +type lexer struct { + input string + start int + pos int + line int + state stateFn + items chan item + + // Allow for backing up up to three runes. + // This is necessary because TOML contains 3-rune tokens (""" and '''). + prevWidths [3]int + nprev int // how many of prevWidths are in use + // If we emit an eof, we can still back up, but it is not OK to call + // next again. + atEOF bool + + // A stack of state functions used to maintain context. + // The idea is to reuse parts of the state machine in various places. + // For example, values can appear at the top level or within arbitrarily + // nested arrays. The last state on the stack is used after a value has + // been lexed. Similarly for comments. + stack []stateFn +} + +type item struct { + typ itemType + val string + line int +} + +func (lx *lexer) nextItem() item { + for { + select { + case item := <-lx.items: + return item + default: + lx.state = lx.state(lx) + } + } +} + +func lex(input string) *lexer { + lx := &lexer{ + input: input, + state: lexTop, + line: 1, + items: make(chan item, 10), + stack: make([]stateFn, 0, 10), + } + return lx +} + +func (lx *lexer) push(state stateFn) { + lx.stack = append(lx.stack, state) +} + +func (lx *lexer) pop() stateFn { + if len(lx.stack) == 0 { + return lx.errorf("BUG in lexer: no states to pop") + } + last := lx.stack[len(lx.stack)-1] + lx.stack = lx.stack[0 : len(lx.stack)-1] + return last +} + +func (lx *lexer) current() string { + return lx.input[lx.start:lx.pos] +} + +func (lx *lexer) emit(typ itemType) { + lx.items <- item{typ, lx.current(), lx.line} + lx.start = lx.pos +} + +func (lx *lexer) emitTrim(typ itemType) { + lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} + lx.start = lx.pos +} + +func (lx *lexer) next() (r rune) { + if lx.atEOF { + panic("next called after EOF") + } + if lx.pos >= len(lx.input) { + lx.atEOF = true + return eof + } + + if lx.input[lx.pos] == '\n' { + lx.line++ + } + lx.prevWidths[2] = lx.prevWidths[1] + lx.prevWidths[1] = lx.prevWidths[0] + if lx.nprev < 3 { + lx.nprev++ + } + r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) + lx.prevWidths[0] = w + lx.pos += w + return r +} + +// ignore skips over the pending input before this point. +func (lx *lexer) ignore() { + lx.start = lx.pos +} + +// backup steps back one rune. Can be called only twice between calls to next. +func (lx *lexer) backup() { + if lx.atEOF { + lx.atEOF = false + return + } + if lx.nprev < 1 { + panic("backed up too far") + } + w := lx.prevWidths[0] + lx.prevWidths[0] = lx.prevWidths[1] + lx.prevWidths[1] = lx.prevWidths[2] + lx.nprev-- + lx.pos -= w + if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { + lx.line-- + } +} + +// accept consumes the next rune if it's equal to `valid`. +func (lx *lexer) accept(valid rune) bool { + if lx.next() == valid { + return true + } + lx.backup() + return false +} + +// peek returns but does not consume the next rune in the input. +func (lx *lexer) peek() rune { + r := lx.next() + lx.backup() + return r +} + +// skip ignores all input that matches the given predicate. +func (lx *lexer) skip(pred func(rune) bool) { + for { + r := lx.next() + if pred(r) { + continue + } + lx.backup() + lx.ignore() + return + } +} + +// errorf stops all lexing by emitting an error and returning `nil`. +// Note that any value that is a character is escaped if it's a special +// character (newlines, tabs, etc.). +func (lx *lexer) errorf(format string, values ...interface{}) stateFn { + lx.items <- item{ + itemError, + fmt.Sprintf(format, values...), + lx.line, + } + return nil +} + +// lexTop consumes elements at the top level of TOML data. +func lexTop(lx *lexer) stateFn { + r := lx.next() + if isWhitespace(r) || isNL(r) { + return lexSkip(lx, lexTop) + } + switch r { + case commentStart: + lx.push(lexTop) + return lexCommentStart + case tableStart: + return lexTableStart + case eof: + if lx.pos > lx.start { + return lx.errorf("unexpected EOF") + } + lx.emit(itemEOF) + return nil + } + + // At this point, the only valid item can be a key, so we back up + // and let the key lexer do the rest. + lx.backup() + lx.push(lexTopEnd) + return lexKeyStart +} + +// lexTopEnd is entered whenever a top-level item has been consumed. (A value +// or a table.) It must see only whitespace, and will turn back to lexTop +// upon a newline. If it sees EOF, it will quit the lexer successfully. +func lexTopEnd(lx *lexer) stateFn { + r := lx.next() + switch { + case r == commentStart: + // a comment will read to a newline for us. + lx.push(lexTop) + return lexCommentStart + case isWhitespace(r): + return lexTopEnd + case isNL(r): + lx.ignore() + return lexTop + case r == eof: + lx.emit(itemEOF) + return nil + } + return lx.errorf("expected a top-level item to end with a newline, "+ + "comment, or EOF, but got %q instead", r) +} + +// lexTable lexes the beginning of a table. Namely, it makes sure that +// it starts with a character other than '.' and ']'. +// It assumes that '[' has already been consumed. +// It also handles the case that this is an item in an array of tables. +// e.g., '[[name]]'. +func lexTableStart(lx *lexer) stateFn { + if lx.peek() == arrayTableStart { + lx.next() + lx.emit(itemArrayTableStart) + lx.push(lexArrayTableEnd) + } else { + lx.emit(itemTableStart) + lx.push(lexTableEnd) + } + return lexTableNameStart +} + +func lexTableEnd(lx *lexer) stateFn { + lx.emit(itemTableEnd) + return lexTopEnd +} + +func lexArrayTableEnd(lx *lexer) stateFn { + if r := lx.next(); r != arrayTableEnd { + return lx.errorf("expected end of table array name delimiter %q, "+ + "but got %q instead", arrayTableEnd, r) + } + lx.emit(itemArrayTableEnd) + return lexTopEnd +} + +func lexTableNameStart(lx *lexer) stateFn { + lx.skip(isWhitespace) + switch r := lx.peek(); { + case r == tableEnd || r == eof: + return lx.errorf("unexpected end of table name " + + "(table names cannot be empty)") + case r == tableSep: + return lx.errorf("unexpected table separator " + + "(table names cannot be empty)") + case r == stringStart || r == rawStringStart: + lx.ignore() + lx.push(lexTableNameEnd) + return lexValue // reuse string lexing + default: + return lexBareTableName + } +} + +// lexBareTableName lexes the name of a table. It assumes that at least one +// valid character for the table has already been read. +func lexBareTableName(lx *lexer) stateFn { + r := lx.next() + if isBareKeyChar(r) { + return lexBareTableName + } + lx.backup() + lx.emit(itemText) + return lexTableNameEnd +} + +// lexTableNameEnd reads the end of a piece of a table name, optionally +// consuming whitespace. +func lexTableNameEnd(lx *lexer) stateFn { + lx.skip(isWhitespace) + switch r := lx.next(); { + case isWhitespace(r): + return lexTableNameEnd + case r == tableSep: + lx.ignore() + return lexTableNameStart + case r == tableEnd: + return lx.pop() + default: + return lx.errorf("expected '.' or ']' to end table name, "+ + "but got %q instead", r) + } +} + +// lexKeyStart consumes a key name up until the first non-whitespace character. +// lexKeyStart will ignore whitespace. +func lexKeyStart(lx *lexer) stateFn { + r := lx.peek() + switch { + case r == keySep: + return lx.errorf("unexpected key separator %q", keySep) + case isWhitespace(r) || isNL(r): + lx.next() + return lexSkip(lx, lexKeyStart) + case r == stringStart || r == rawStringStart: + lx.ignore() + lx.emit(itemKeyStart) + lx.push(lexKeyEnd) + return lexValue // reuse string lexing + default: + lx.ignore() + lx.emit(itemKeyStart) + return lexBareKey + } +} + +// lexBareKey consumes the text of a bare key. Assumes that the first character +// (which is not whitespace) has not yet been consumed. +func lexBareKey(lx *lexer) stateFn { + switch r := lx.next(); { + case isBareKeyChar(r): + return lexBareKey + case isWhitespace(r): + lx.backup() + lx.emit(itemText) + return lexKeyEnd + case r == keySep: + lx.backup() + lx.emit(itemText) + return lexKeyEnd + default: + return lx.errorf("bare keys cannot contain %q", r) + } +} + +// lexKeyEnd consumes the end of a key and trims whitespace (up to the key +// separator). +func lexKeyEnd(lx *lexer) stateFn { + switch r := lx.next(); { + case r == keySep: + return lexSkip(lx, lexValue) + case isWhitespace(r): + return lexSkip(lx, lexKeyEnd) + default: + return lx.errorf("expected key separator %q, but got %q instead", + keySep, r) + } +} + +// lexValue starts the consumption of a value anywhere a value is expected. +// lexValue will ignore whitespace. +// After a value is lexed, the last state on the next is popped and returned. +func lexValue(lx *lexer) stateFn { + // We allow whitespace to precede a value, but NOT newlines. + // In array syntax, the array states are responsible for ignoring newlines. + r := lx.next() + switch { + case isWhitespace(r): + return lexSkip(lx, lexValue) + case isDigit(r): + lx.backup() // avoid an extra state and use the same as above + return lexNumberOrDateStart + } + switch r { + case arrayStart: + lx.ignore() + lx.emit(itemArray) + return lexArrayValue + case inlineTableStart: + lx.ignore() + lx.emit(itemInlineTableStart) + return lexInlineTableValue + case stringStart: + if lx.accept(stringStart) { + if lx.accept(stringStart) { + lx.ignore() // Ignore """ + return lexMultilineString + } + lx.backup() + } + lx.ignore() // ignore the '"' + return lexString + case rawStringStart: + if lx.accept(rawStringStart) { + if lx.accept(rawStringStart) { + lx.ignore() // Ignore """ + return lexMultilineRawString + } + lx.backup() + } + lx.ignore() // ignore the "'" + return lexRawString + case '+', '-': + return lexNumberStart + case '.': // special error case, be kind to users + return lx.errorf("floats must start with a digit, not '.'") + } + if unicode.IsLetter(r) { + // Be permissive here; lexBool will give a nice error if the + // user wrote something like + // x = foo + // (i.e. not 'true' or 'false' but is something else word-like.) + lx.backup() + return lexBool + } + return lx.errorf("expected value but found %q instead", r) +} + +// lexArrayValue consumes one value in an array. It assumes that '[' or ',' +// have already been consumed. All whitespace and newlines are ignored. +func lexArrayValue(lx *lexer) stateFn { + r := lx.next() + switch { + case isWhitespace(r) || isNL(r): + return lexSkip(lx, lexArrayValue) + case r == commentStart: + lx.push(lexArrayValue) + return lexCommentStart + case r == comma: + return lx.errorf("unexpected comma") + case r == arrayEnd: + // NOTE(caleb): The spec isn't clear about whether you can have + // a trailing comma or not, so we'll allow it. + return lexArrayEnd + } + + lx.backup() + lx.push(lexArrayValueEnd) + return lexValue +} + +// lexArrayValueEnd consumes everything between the end of an array value and +// the next value (or the end of the array): it ignores whitespace and newlines +// and expects either a ',' or a ']'. +func lexArrayValueEnd(lx *lexer) stateFn { + r := lx.next() + switch { + case isWhitespace(r) || isNL(r): + return lexSkip(lx, lexArrayValueEnd) + case r == commentStart: + lx.push(lexArrayValueEnd) + return lexCommentStart + case r == comma: + lx.ignore() + return lexArrayValue // move on to the next value + case r == arrayEnd: + return lexArrayEnd + } + return lx.errorf( + "expected a comma or array terminator %q, but got %q instead", + arrayEnd, r, + ) +} + +// lexArrayEnd finishes the lexing of an array. +// It assumes that a ']' has just been consumed. +func lexArrayEnd(lx *lexer) stateFn { + lx.ignore() + lx.emit(itemArrayEnd) + return lx.pop() +} + +// lexInlineTableValue consumes one key/value pair in an inline table. +// It assumes that '{' or ',' have already been consumed. Whitespace is ignored. +func lexInlineTableValue(lx *lexer) stateFn { + r := lx.next() + switch { + case isWhitespace(r): + return lexSkip(lx, lexInlineTableValue) + case isNL(r): + return lx.errorf("newlines not allowed within inline tables") + case r == commentStart: + lx.push(lexInlineTableValue) + return lexCommentStart + case r == comma: + return lx.errorf("unexpected comma") + case r == inlineTableEnd: + return lexInlineTableEnd + } + lx.backup() + lx.push(lexInlineTableValueEnd) + return lexKeyStart +} + +// lexInlineTableValueEnd consumes everything between the end of an inline table +// key/value pair and the next pair (or the end of the table): +// it ignores whitespace and expects either a ',' or a '}'. +func lexInlineTableValueEnd(lx *lexer) stateFn { + r := lx.next() + switch { + case isWhitespace(r): + return lexSkip(lx, lexInlineTableValueEnd) + case isNL(r): + return lx.errorf("newlines not allowed within inline tables") + case r == commentStart: + lx.push(lexInlineTableValueEnd) + return lexCommentStart + case r == comma: + lx.ignore() + return lexInlineTableValue + case r == inlineTableEnd: + return lexInlineTableEnd + } + return lx.errorf("expected a comma or an inline table terminator %q, "+ + "but got %q instead", inlineTableEnd, r) +} + +// lexInlineTableEnd finishes the lexing of an inline table. +// It assumes that a '}' has just been consumed. +func lexInlineTableEnd(lx *lexer) stateFn { + lx.ignore() + lx.emit(itemInlineTableEnd) + return lx.pop() +} + +// lexString consumes the inner contents of a string. It assumes that the +// beginning '"' has already been consumed and ignored. +func lexString(lx *lexer) stateFn { + r := lx.next() + switch { + case r == eof: + return lx.errorf("unexpected EOF") + case isNL(r): + return lx.errorf("strings cannot contain newlines") + case r == '\\': + lx.push(lexString) + return lexStringEscape + case r == stringEnd: + lx.backup() + lx.emit(itemString) + lx.next() + lx.ignore() + return lx.pop() + } + return lexString +} + +// lexMultilineString consumes the inner contents of a string. It assumes that +// the beginning '"""' has already been consumed and ignored. +func lexMultilineString(lx *lexer) stateFn { + switch lx.next() { + case eof: + return lx.errorf("unexpected EOF") + case '\\': + return lexMultilineStringEscape + case stringEnd: + if lx.accept(stringEnd) { + if lx.accept(stringEnd) { + lx.backup() + lx.backup() + lx.backup() + lx.emit(itemMultilineString) + lx.next() + lx.next() + lx.next() + lx.ignore() + return lx.pop() + } + lx.backup() + } + } + return lexMultilineString +} + +// lexRawString consumes a raw string. Nothing can be escaped in such a string. +// It assumes that the beginning "'" has already been consumed and ignored. +func lexRawString(lx *lexer) stateFn { + r := lx.next() + switch { + case r == eof: + return lx.errorf("unexpected EOF") + case isNL(r): + return lx.errorf("strings cannot contain newlines") + case r == rawStringEnd: + lx.backup() + lx.emit(itemRawString) + lx.next() + lx.ignore() + return lx.pop() + } + return lexRawString +} + +// lexMultilineRawString consumes a raw string. Nothing can be escaped in such +// a string. It assumes that the beginning "'''" has already been consumed and +// ignored. +func lexMultilineRawString(lx *lexer) stateFn { + switch lx.next() { + case eof: + return lx.errorf("unexpected EOF") + case rawStringEnd: + if lx.accept(rawStringEnd) { + if lx.accept(rawStringEnd) { + lx.backup() + lx.backup() + lx.backup() + lx.emit(itemRawMultilineString) + lx.next() + lx.next() + lx.next() + lx.ignore() + return lx.pop() + } + lx.backup() + } + } + return lexMultilineRawString +} + +// lexMultilineStringEscape consumes an escaped character. It assumes that the +// preceding '\\' has already been consumed. +func lexMultilineStringEscape(lx *lexer) stateFn { + // Handle the special case first: + if isNL(lx.next()) { + return lexMultilineString + } + lx.backup() + lx.push(lexMultilineString) + return lexStringEscape(lx) +} + +func lexStringEscape(lx *lexer) stateFn { + r := lx.next() + switch r { + case 'b': + fallthrough + case 't': + fallthrough + case 'n': + fallthrough + case 'f': + fallthrough + case 'r': + fallthrough + case '"': + fallthrough + case '\\': + return lx.pop() + case 'u': + return lexShortUnicodeEscape + case 'U': + return lexLongUnicodeEscape + } + return lx.errorf("invalid escape character %q; only the following "+ + "escape characters are allowed: "+ + `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) +} + +func lexShortUnicodeEscape(lx *lexer) stateFn { + var r rune + for i := 0; i < 4; i++ { + r = lx.next() + if !isHexadecimal(r) { + return lx.errorf(`expected four hexadecimal digits after '\u', `+ + "but got %q instead", lx.current()) + } + } + return lx.pop() +} + +func lexLongUnicodeEscape(lx *lexer) stateFn { + var r rune + for i := 0; i < 8; i++ { + r = lx.next() + if !isHexadecimal(r) { + return lx.errorf(`expected eight hexadecimal digits after '\U', `+ + "but got %q instead", lx.current()) + } + } + return lx.pop() +} + +// lexNumberOrDateStart consumes either an integer, a float, or datetime. +func lexNumberOrDateStart(lx *lexer) stateFn { + r := lx.next() + if isDigit(r) { + return lexNumberOrDate + } + switch r { + case '_': + return lexNumber + case 'e', 'E': + return lexFloat + case '.': + return lx.errorf("floats must start with a digit, not '.'") + } + return lx.errorf("expected a digit but got %q", r) +} + +// lexNumberOrDate consumes either an integer, float or datetime. +func lexNumberOrDate(lx *lexer) stateFn { + r := lx.next() + if isDigit(r) { + return lexNumberOrDate + } + switch r { + case '-': + return lexDatetime + case '_': + return lexNumber + case '.', 'e', 'E': + return lexFloat + } + + lx.backup() + lx.emit(itemInteger) + return lx.pop() +} + +// lexDatetime consumes a Datetime, to a first approximation. +// The parser validates that it matches one of the accepted formats. +func lexDatetime(lx *lexer) stateFn { + r := lx.next() + if isDigit(r) { + return lexDatetime + } + switch r { + case '-', 'T', ':', '.', 'Z', '+': + return lexDatetime + } + + lx.backup() + lx.emit(itemDatetime) + return lx.pop() +} + +// lexNumberStart consumes either an integer or a float. It assumes that a sign +// has already been read, but that *no* digits have been consumed. +// lexNumberStart will move to the appropriate integer or float states. +func lexNumberStart(lx *lexer) stateFn { + // We MUST see a digit. Even floats have to start with a digit. + r := lx.next() + if !isDigit(r) { + if r == '.' { + return lx.errorf("floats must start with a digit, not '.'") + } + return lx.errorf("expected a digit but got %q", r) + } + return lexNumber +} + +// lexNumber consumes an integer or a float after seeing the first digit. +func lexNumber(lx *lexer) stateFn { + r := lx.next() + if isDigit(r) { + return lexNumber + } + switch r { + case '_': + return lexNumber + case '.', 'e', 'E': + return lexFloat + } + + lx.backup() + lx.emit(itemInteger) + return lx.pop() +} + +// lexFloat consumes the elements of a float. It allows any sequence of +// float-like characters, so floats emitted by the lexer are only a first +// approximation and must be validated by the parser. +func lexFloat(lx *lexer) stateFn { + r := lx.next() + if isDigit(r) { + return lexFloat + } + switch r { + case '_', '.', '-', '+', 'e', 'E': + return lexFloat + } + + lx.backup() + lx.emit(itemFloat) + return lx.pop() +} + +// lexBool consumes a bool string: 'true' or 'false. +func lexBool(lx *lexer) stateFn { + var rs []rune + for { + r := lx.next() + if !unicode.IsLetter(r) { + lx.backup() + break + } + rs = append(rs, r) + } + s := string(rs) + switch s { + case "true", "false": + lx.emit(itemBool) + return lx.pop() + } + return lx.errorf("expected value but found %q instead", s) +} + +// lexCommentStart begins the lexing of a comment. It will emit +// itemCommentStart and consume no characters, passing control to lexComment. +func lexCommentStart(lx *lexer) stateFn { + lx.ignore() + lx.emit(itemCommentStart) + return lexComment +} + +// lexComment lexes an entire comment. It assumes that '#' has been consumed. +// It will consume *up to* the first newline character, and pass control +// back to the last state on the stack. +func lexComment(lx *lexer) stateFn { + r := lx.peek() + if isNL(r) || r == eof { + lx.emit(itemText) + return lx.pop() + } + lx.next() + return lexComment +} + +// lexSkip ignores all slurped input and moves on to the next state. +func lexSkip(lx *lexer, nextState stateFn) stateFn { + return func(lx *lexer) stateFn { + lx.ignore() + return nextState + } +} + +// isWhitespace returns true if `r` is a whitespace character according +// to the spec. +func isWhitespace(r rune) bool { + return r == '\t' || r == ' ' +} + +func isNL(r rune) bool { + return r == '\n' || r == '\r' +} + +func isDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +func isHexadecimal(r rune) bool { + return (r >= '0' && r <= '9') || + (r >= 'a' && r <= 'f') || + (r >= 'A' && r <= 'F') +} + +func isBareKeyChar(r rune) bool { + return (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= '0' && r <= '9') || + r == '_' || + r == '-' +} + +func (itype itemType) String() string { + switch itype { + case itemError: + return "Error" + case itemNIL: + return "NIL" + case itemEOF: + return "EOF" + case itemText: + return "Text" + case itemString, itemRawString, itemMultilineString, itemRawMultilineString: + return "String" + case itemBool: + return "Bool" + case itemInteger: + return "Integer" + case itemFloat: + return "Float" + case itemDatetime: + return "DateTime" + case itemTableStart: + return "TableStart" + case itemTableEnd: + return "TableEnd" + case itemKeyStart: + return "KeyStart" + case itemArray: + return "Array" + case itemArrayEnd: + return "ArrayEnd" + case itemCommentStart: + return "CommentStart" + } + panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype))) +} + +func (item item) String() string { + return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) +} diff --git a/vendor/github.com/BurntSushi/toml/parse.go b/vendor/github.com/BurntSushi/toml/parse.go new file mode 100644 index 0000000000..50869ef926 --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/parse.go @@ -0,0 +1,592 @@ +package toml + +import ( + "fmt" + "strconv" + "strings" + "time" + "unicode" + "unicode/utf8" +) + +type parser struct { + mapping map[string]interface{} + types map[string]tomlType + lx *lexer + + // A list of keys in the order that they appear in the TOML data. + ordered []Key + + // the full key for the current hash in scope + context Key + + // the base key name for everything except hashes + currentKey string + + // rough approximation of line number + approxLine int + + // A map of 'key.group.names' to whether they were created implicitly. + implicits map[string]bool +} + +type parseError string + +func (pe parseError) Error() string { + return string(pe) +} + +func parse(data string) (p *parser, err error) { + defer func() { + if r := recover(); r != nil { + var ok bool + if err, ok = r.(parseError); ok { + return + } + panic(r) + } + }() + + p = &parser{ + mapping: make(map[string]interface{}), + types: make(map[string]tomlType), + lx: lex(data), + ordered: make([]Key, 0), + implicits: make(map[string]bool), + } + for { + item := p.next() + if item.typ == itemEOF { + break + } + p.topLevel(item) + } + + return p, nil +} + +func (p *parser) panicf(format string, v ...interface{}) { + msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s", + p.approxLine, p.current(), fmt.Sprintf(format, v...)) + panic(parseError(msg)) +} + +func (p *parser) next() item { + it := p.lx.nextItem() + if it.typ == itemError { + p.panicf("%s", it.val) + } + return it +} + +func (p *parser) bug(format string, v ...interface{}) { + panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) +} + +func (p *parser) expect(typ itemType) item { + it := p.next() + p.assertEqual(typ, it.typ) + return it +} + +func (p *parser) assertEqual(expected, got itemType) { + if expected != got { + p.bug("Expected '%s' but got '%s'.", expected, got) + } +} + +func (p *parser) topLevel(item item) { + switch item.typ { + case itemCommentStart: + p.approxLine = item.line + p.expect(itemText) + case itemTableStart: + kg := p.next() + p.approxLine = kg.line + + var key Key + for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() { + key = append(key, p.keyString(kg)) + } + p.assertEqual(itemTableEnd, kg.typ) + + p.establishContext(key, false) + p.setType("", tomlHash) + p.ordered = append(p.ordered, key) + case itemArrayTableStart: + kg := p.next() + p.approxLine = kg.line + + var key Key + for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() { + key = append(key, p.keyString(kg)) + } + p.assertEqual(itemArrayTableEnd, kg.typ) + + p.establishContext(key, true) + p.setType("", tomlArrayHash) + p.ordered = append(p.ordered, key) + case itemKeyStart: + kname := p.next() + p.approxLine = kname.line + p.currentKey = p.keyString(kname) + + val, typ := p.value(p.next()) + p.setValue(p.currentKey, val) + p.setType(p.currentKey, typ) + p.ordered = append(p.ordered, p.context.add(p.currentKey)) + p.currentKey = "" + default: + p.bug("Unexpected type at top level: %s", item.typ) + } +} + +// Gets a string for a key (or part of a key in a table name). +func (p *parser) keyString(it item) string { + switch it.typ { + case itemText: + return it.val + case itemString, itemMultilineString, + itemRawString, itemRawMultilineString: + s, _ := p.value(it) + return s.(string) + default: + p.bug("Unexpected key type: %s", it.typ) + panic("unreachable") + } +} + +// value translates an expected value from the lexer into a Go value wrapped +// as an empty interface. +func (p *parser) value(it item) (interface{}, tomlType) { + switch it.typ { + case itemString: + return p.replaceEscapes(it.val), p.typeOfPrimitive(it) + case itemMultilineString: + trimmed := stripFirstNewline(stripEscapedWhitespace(it.val)) + return p.replaceEscapes(trimmed), p.typeOfPrimitive(it) + case itemRawString: + return it.val, p.typeOfPrimitive(it) + case itemRawMultilineString: + return stripFirstNewline(it.val), p.typeOfPrimitive(it) + case itemBool: + switch it.val { + case "true": + return true, p.typeOfPrimitive(it) + case "false": + return false, p.typeOfPrimitive(it) + } + p.bug("Expected boolean value, but got '%s'.", it.val) + case itemInteger: + if !numUnderscoresOK(it.val) { + p.panicf("Invalid integer %q: underscores must be surrounded by digits", + it.val) + } + val := strings.Replace(it.val, "_", "", -1) + num, err := strconv.ParseInt(val, 10, 64) + if err != nil { + // Distinguish integer values. Normally, it'd be a bug if the lexer + // provides an invalid integer, but it's possible that the number is + // out of range of valid values (which the lexer cannot determine). + // So mark the former as a bug but the latter as a legitimate user + // error. + if e, ok := err.(*strconv.NumError); ok && + e.Err == strconv.ErrRange { + + p.panicf("Integer '%s' is out of the range of 64-bit "+ + "signed integers.", it.val) + } else { + p.bug("Expected integer value, but got '%s'.", it.val) + } + } + return num, p.typeOfPrimitive(it) + case itemFloat: + parts := strings.FieldsFunc(it.val, func(r rune) bool { + switch r { + case '.', 'e', 'E': + return true + } + return false + }) + for _, part := range parts { + if !numUnderscoresOK(part) { + p.panicf("Invalid float %q: underscores must be "+ + "surrounded by digits", it.val) + } + } + if !numPeriodsOK(it.val) { + // As a special case, numbers like '123.' or '1.e2', + // which are valid as far as Go/strconv are concerned, + // must be rejected because TOML says that a fractional + // part consists of '.' followed by 1+ digits. + p.panicf("Invalid float %q: '.' must be followed "+ + "by one or more digits", it.val) + } + val := strings.Replace(it.val, "_", "", -1) + num, err := strconv.ParseFloat(val, 64) + if err != nil { + if e, ok := err.(*strconv.NumError); ok && + e.Err == strconv.ErrRange { + + p.panicf("Float '%s' is out of the range of 64-bit "+ + "IEEE-754 floating-point numbers.", it.val) + } else { + p.panicf("Invalid float value: %q", it.val) + } + } + return num, p.typeOfPrimitive(it) + case itemDatetime: + var t time.Time + var ok bool + var err error + for _, format := range []string{ + "2006-01-02T15:04:05Z07:00", + "2006-01-02T15:04:05", + "2006-01-02", + } { + t, err = time.ParseInLocation(format, it.val, time.Local) + if err == nil { + ok = true + break + } + } + if !ok { + p.panicf("Invalid TOML Datetime: %q.", it.val) + } + return t, p.typeOfPrimitive(it) + case itemArray: + array := make([]interface{}, 0) + types := make([]tomlType, 0) + + for it = p.next(); it.typ != itemArrayEnd; it = p.next() { + if it.typ == itemCommentStart { + p.expect(itemText) + continue + } + + val, typ := p.value(it) + array = append(array, val) + types = append(types, typ) + } + return array, p.typeOfArray(types) + case itemInlineTableStart: + var ( + hash = make(map[string]interface{}) + outerContext = p.context + outerKey = p.currentKey + ) + + p.context = append(p.context, p.currentKey) + p.currentKey = "" + for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { + if it.typ != itemKeyStart { + p.bug("Expected key start but instead found %q, around line %d", + it.val, p.approxLine) + } + if it.typ == itemCommentStart { + p.expect(itemText) + continue + } + + // retrieve key + k := p.next() + p.approxLine = k.line + kname := p.keyString(k) + + // retrieve value + p.currentKey = kname + val, typ := p.value(p.next()) + // make sure we keep metadata up to date + p.setType(kname, typ) + p.ordered = append(p.ordered, p.context.add(p.currentKey)) + hash[kname] = val + } + p.context = outerContext + p.currentKey = outerKey + return hash, tomlHash + } + p.bug("Unexpected value type: %s", it.typ) + panic("unreachable") +} + +// numUnderscoresOK checks whether each underscore in s is surrounded by +// characters that are not underscores. +func numUnderscoresOK(s string) bool { + accept := false + for _, r := range s { + if r == '_' { + if !accept { + return false + } + accept = false + continue + } + accept = true + } + return accept +} + +// numPeriodsOK checks whether every period in s is followed by a digit. +func numPeriodsOK(s string) bool { + period := false + for _, r := range s { + if period && !isDigit(r) { + return false + } + period = r == '.' + } + return !period +} + +// establishContext sets the current context of the parser, +// where the context is either a hash or an array of hashes. Which one is +// set depends on the value of the `array` parameter. +// +// Establishing the context also makes sure that the key isn't a duplicate, and +// will create implicit hashes automatically. +func (p *parser) establishContext(key Key, array bool) { + var ok bool + + // Always start at the top level and drill down for our context. + hashContext := p.mapping + keyContext := make(Key, 0) + + // We only need implicit hashes for key[0:-1] + for _, k := range key[0 : len(key)-1] { + _, ok = hashContext[k] + keyContext = append(keyContext, k) + + // No key? Make an implicit hash and move on. + if !ok { + p.addImplicit(keyContext) + hashContext[k] = make(map[string]interface{}) + } + + // If the hash context is actually an array of tables, then set + // the hash context to the last element in that array. + // + // Otherwise, it better be a table, since this MUST be a key group (by + // virtue of it not being the last element in a key). + switch t := hashContext[k].(type) { + case []map[string]interface{}: + hashContext = t[len(t)-1] + case map[string]interface{}: + hashContext = t + default: + p.panicf("Key '%s' was already created as a hash.", keyContext) + } + } + + p.context = keyContext + if array { + // If this is the first element for this array, then allocate a new + // list of tables for it. + k := key[len(key)-1] + if _, ok := hashContext[k]; !ok { + hashContext[k] = make([]map[string]interface{}, 0, 5) + } + + // Add a new table. But make sure the key hasn't already been used + // for something else. + if hash, ok := hashContext[k].([]map[string]interface{}); ok { + hashContext[k] = append(hash, make(map[string]interface{})) + } else { + p.panicf("Key '%s' was already created and cannot be used as "+ + "an array.", keyContext) + } + } else { + p.setValue(key[len(key)-1], make(map[string]interface{})) + } + p.context = append(p.context, key[len(key)-1]) +} + +// setValue sets the given key to the given value in the current context. +// It will make sure that the key hasn't already been defined, account for +// implicit key groups. +func (p *parser) setValue(key string, value interface{}) { + var tmpHash interface{} + var ok bool + + hash := p.mapping + keyContext := make(Key, 0) + for _, k := range p.context { + keyContext = append(keyContext, k) + if tmpHash, ok = hash[k]; !ok { + p.bug("Context for key '%s' has not been established.", keyContext) + } + switch t := tmpHash.(type) { + case []map[string]interface{}: + // The context is a table of hashes. Pick the most recent table + // defined as the current hash. + hash = t[len(t)-1] + case map[string]interface{}: + hash = t + default: + p.bug("Expected hash to have type 'map[string]interface{}', but "+ + "it has '%T' instead.", tmpHash) + } + } + keyContext = append(keyContext, key) + + if _, ok := hash[key]; ok { + // Typically, if the given key has already been set, then we have + // to raise an error since duplicate keys are disallowed. However, + // it's possible that a key was previously defined implicitly. In this + // case, it is allowed to be redefined concretely. (See the + // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.) + // + // But we have to make sure to stop marking it as an implicit. (So that + // another redefinition provokes an error.) + // + // Note that since it has already been defined (as a hash), we don't + // want to overwrite it. So our business is done. + if p.isImplicit(keyContext) { + p.removeImplicit(keyContext) + return + } + + // Otherwise, we have a concrete key trying to override a previous + // key, which is *always* wrong. + p.panicf("Key '%s' has already been defined.", keyContext) + } + hash[key] = value +} + +// setType sets the type of a particular value at a given key. +// It should be called immediately AFTER setValue. +// +// Note that if `key` is empty, then the type given will be applied to the +// current context (which is either a table or an array of tables). +func (p *parser) setType(key string, typ tomlType) { + keyContext := make(Key, 0, len(p.context)+1) + for _, k := range p.context { + keyContext = append(keyContext, k) + } + if len(key) > 0 { // allow type setting for hashes + keyContext = append(keyContext, key) + } + p.types[keyContext.String()] = typ +} + +// addImplicit sets the given Key as having been created implicitly. +func (p *parser) addImplicit(key Key) { + p.implicits[key.String()] = true +} + +// removeImplicit stops tagging the given key as having been implicitly +// created. +func (p *parser) removeImplicit(key Key) { + p.implicits[key.String()] = false +} + +// isImplicit returns true if the key group pointed to by the key was created +// implicitly. +func (p *parser) isImplicit(key Key) bool { + return p.implicits[key.String()] +} + +// current returns the full key name of the current context. +func (p *parser) current() string { + if len(p.currentKey) == 0 { + return p.context.String() + } + if len(p.context) == 0 { + return p.currentKey + } + return fmt.Sprintf("%s.%s", p.context, p.currentKey) +} + +func stripFirstNewline(s string) string { + if len(s) == 0 || s[0] != '\n' { + return s + } + return s[1:] +} + +func stripEscapedWhitespace(s string) string { + esc := strings.Split(s, "\\\n") + if len(esc) > 1 { + for i := 1; i < len(esc); i++ { + esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace) + } + } + return strings.Join(esc, "") +} + +func (p *parser) replaceEscapes(str string) string { + var replaced []rune + s := []byte(str) + r := 0 + for r < len(s) { + if s[r] != '\\' { + c, size := utf8.DecodeRune(s[r:]) + r += size + replaced = append(replaced, c) + continue + } + r += 1 + if r >= len(s) { + p.bug("Escape sequence at end of string.") + return "" + } + switch s[r] { + default: + p.bug("Expected valid escape code after \\, but got %q.", s[r]) + return "" + case 'b': + replaced = append(replaced, rune(0x0008)) + r += 1 + case 't': + replaced = append(replaced, rune(0x0009)) + r += 1 + case 'n': + replaced = append(replaced, rune(0x000A)) + r += 1 + case 'f': + replaced = append(replaced, rune(0x000C)) + r += 1 + case 'r': + replaced = append(replaced, rune(0x000D)) + r += 1 + case '"': + replaced = append(replaced, rune(0x0022)) + r += 1 + case '\\': + replaced = append(replaced, rune(0x005C)) + r += 1 + case 'u': + // At this point, we know we have a Unicode escape of the form + // `uXXXX` at [r, r+5). (Because the lexer guarantees this + // for us.) + escaped := p.asciiEscapeToUnicode(s[r+1 : r+5]) + replaced = append(replaced, escaped) + r += 5 + case 'U': + // At this point, we know we have a Unicode escape of the form + // `uXXXX` at [r, r+9). (Because the lexer guarantees this + // for us.) + escaped := p.asciiEscapeToUnicode(s[r+1 : r+9]) + replaced = append(replaced, escaped) + r += 9 + } + } + return string(replaced) +} + +func (p *parser) asciiEscapeToUnicode(bs []byte) rune { + s := string(bs) + hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) + if err != nil { + p.bug("Could not parse '%s' as a hexadecimal number, but the "+ + "lexer claims it's OK: %s", s, err) + } + if !utf8.ValidRune(rune(hex)) { + p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) + } + return rune(hex) +} + +func isStringType(ty itemType) bool { + return ty == itemString || ty == itemMultilineString || + ty == itemRawString || ty == itemRawMultilineString +} diff --git a/vendor/github.com/BurntSushi/toml/type_check.go b/vendor/github.com/BurntSushi/toml/type_check.go new file mode 100644 index 0000000000..c73f8afc1a --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/type_check.go @@ -0,0 +1,91 @@ +package toml + +// tomlType represents any Go type that corresponds to a TOML type. +// While the first draft of the TOML spec has a simplistic type system that +// probably doesn't need this level of sophistication, we seem to be militating +// toward adding real composite types. +type tomlType interface { + typeString() string +} + +// typeEqual accepts any two types and returns true if they are equal. +func typeEqual(t1, t2 tomlType) bool { + if t1 == nil || t2 == nil { + return false + } + return t1.typeString() == t2.typeString() +} + +func typeIsHash(t tomlType) bool { + return typeEqual(t, tomlHash) || typeEqual(t, tomlArrayHash) +} + +type tomlBaseType string + +func (btype tomlBaseType) typeString() string { + return string(btype) +} + +func (btype tomlBaseType) String() string { + return btype.typeString() +} + +var ( + tomlInteger tomlBaseType = "Integer" + tomlFloat tomlBaseType = "Float" + tomlDatetime tomlBaseType = "Datetime" + tomlString tomlBaseType = "String" + tomlBool tomlBaseType = "Bool" + tomlArray tomlBaseType = "Array" + tomlHash tomlBaseType = "Hash" + tomlArrayHash tomlBaseType = "ArrayHash" +) + +// typeOfPrimitive returns a tomlType of any primitive value in TOML. +// Primitive values are: Integer, Float, Datetime, String and Bool. +// +// Passing a lexer item other than the following will cause a BUG message +// to occur: itemString, itemBool, itemInteger, itemFloat, itemDatetime. +func (p *parser) typeOfPrimitive(lexItem item) tomlType { + switch lexItem.typ { + case itemInteger: + return tomlInteger + case itemFloat: + return tomlFloat + case itemDatetime: + return tomlDatetime + case itemString: + return tomlString + case itemMultilineString: + return tomlString + case itemRawString: + return tomlString + case itemRawMultilineString: + return tomlString + case itemBool: + return tomlBool + } + p.bug("Cannot infer primitive type of lex item '%s'.", lexItem) + panic("unreachable") +} + +// typeOfArray returns a tomlType for an array given a list of types of its +// values. +// +// In the current spec, if an array is homogeneous, then its type is always +// "Array". If the array is not homogeneous, an error is generated. +func (p *parser) typeOfArray(types []tomlType) tomlType { + // Empty arrays are cool. + if len(types) == 0 { + return tomlArray + } + + theType := types[0] + for _, t := range types[1:] { + if !typeEqual(theType, t) { + p.panicf("Array contains values of type '%s' and '%s', but "+ + "arrays must be homogeneous.", theType, t) + } + } + return tomlArray +} diff --git a/vendor/github.com/BurntSushi/toml/type_fields.go b/vendor/github.com/BurntSushi/toml/type_fields.go new file mode 100644 index 0000000000..608997c22f --- /dev/null +++ b/vendor/github.com/BurntSushi/toml/type_fields.go @@ -0,0 +1,242 @@ +package toml + +// Struct field handling is adapted from code in encoding/json: +// +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the Go distribution. + +import ( + "reflect" + "sort" + "sync" +) + +// A field represents a single field found in a struct. +type field struct { + name string // the name of the field (`toml` tag included) + tag bool // whether field has a `toml` tag + index []int // represents the depth of an anonymous field + typ reflect.Type // the type of the field +} + +// byName sorts field by name, breaking ties with depth, +// then breaking ties with "name came from toml tag", then +// breaking ties with index sequence. +type byName []field + +func (x byName) Len() int { return len(x) } + +func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (x byName) Less(i, j int) bool { + if x[i].name != x[j].name { + return x[i].name < x[j].name + } + if len(x[i].index) != len(x[j].index) { + return len(x[i].index) < len(x[j].index) + } + if x[i].tag != x[j].tag { + return x[i].tag + } + return byIndex(x).Less(i, j) +} + +// byIndex sorts field by index sequence. +type byIndex []field + +func (x byIndex) Len() int { return len(x) } + +func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (x byIndex) Less(i, j int) bool { + for k, xik := range x[i].index { + if k >= len(x[j].index) { + return false + } + if xik != x[j].index[k] { + return xik < x[j].index[k] + } + } + return len(x[i].index) < len(x[j].index) +} + +// typeFields returns a list of fields that TOML should recognize for the given +// type. The algorithm is breadth-first search over the set of structs to +// include - the top struct and then any reachable anonymous structs. +func typeFields(t reflect.Type) []field { + // Anonymous fields to explore at the current level and the next. + current := []field{} + next := []field{{typ: t}} + + // Count of queued names for current level and the next. + count := map[reflect.Type]int{} + nextCount := map[reflect.Type]int{} + + // Types already visited at an earlier level. + visited := map[reflect.Type]bool{} + + // Fields found. + var fields []field + + for len(next) > 0 { + current, next = next, current[:0] + count, nextCount = nextCount, map[reflect.Type]int{} + + for _, f := range current { + if visited[f.typ] { + continue + } + visited[f.typ] = true + + // Scan f.typ for fields to include. + for i := 0; i < f.typ.NumField(); i++ { + sf := f.typ.Field(i) + if sf.PkgPath != "" && !sf.Anonymous { // unexported + continue + } + opts := getOptions(sf.Tag) + if opts.skip { + continue + } + index := make([]int, len(f.index)+1) + copy(index, f.index) + index[len(f.index)] = i + + ft := sf.Type + if ft.Name() == "" && ft.Kind() == reflect.Ptr { + // Follow pointer. + ft = ft.Elem() + } + + // Record found field and index sequence. + if opts.name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { + tagged := opts.name != "" + name := opts.name + if name == "" { + name = sf.Name + } + fields = append(fields, field{name, tagged, index, ft}) + if count[f.typ] > 1 { + // If there were multiple instances, add a second, + // so that the annihilation code will see a duplicate. + // It only cares about the distinction between 1 or 2, + // so don't bother generating any more copies. + fields = append(fields, fields[len(fields)-1]) + } + continue + } + + // Record new anonymous struct to explore in next round. + nextCount[ft]++ + if nextCount[ft] == 1 { + f := field{name: ft.Name(), index: index, typ: ft} + next = append(next, f) + } + } + } + } + + sort.Sort(byName(fields)) + + // Delete all fields that are hidden by the Go rules for embedded fields, + // except that fields with TOML tags are promoted. + + // The fields are sorted in primary order of name, secondary order + // of field index length. Loop over names; for each name, delete + // hidden fields by choosing the one dominant field that survives. + out := fields[:0] + for advance, i := 0, 0; i < len(fields); i += advance { + // One iteration per name. + // Find the sequence of fields with the name of this first field. + fi := fields[i] + name := fi.name + for advance = 1; i+advance < len(fields); advance++ { + fj := fields[i+advance] + if fj.name != name { + break + } + } + if advance == 1 { // Only one field with this name + out = append(out, fi) + continue + } + dominant, ok := dominantField(fields[i : i+advance]) + if ok { + out = append(out, dominant) + } + } + + fields = out + sort.Sort(byIndex(fields)) + + return fields +} + +// dominantField looks through the fields, all of which are known to +// have the same name, to find the single field that dominates the +// others using Go's embedding rules, modified by the presence of +// TOML tags. If there are multiple top-level fields, the boolean +// will be false: This condition is an error in Go and we skip all +// the fields. +func dominantField(fields []field) (field, bool) { + // The fields are sorted in increasing index-length order. The winner + // must therefore be one with the shortest index length. Drop all + // longer entries, which is easy: just truncate the slice. + length := len(fields[0].index) + tagged := -1 // Index of first tagged field. + for i, f := range fields { + if len(f.index) > length { + fields = fields[:i] + break + } + if f.tag { + if tagged >= 0 { + // Multiple tagged fields at the same level: conflict. + // Return no field. + return field{}, false + } + tagged = i + } + } + if tagged >= 0 { + return fields[tagged], true + } + // All remaining fields have the same length. If there's more than one, + // we have a conflict (two fields named "X" at the same level) and we + // return no field. + if len(fields) > 1 { + return field{}, false + } + return fields[0], true +} + +var fieldCache struct { + sync.RWMutex + m map[reflect.Type][]field +} + +// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. +func cachedTypeFields(t reflect.Type) []field { + fieldCache.RLock() + f := fieldCache.m[t] + fieldCache.RUnlock() + if f != nil { + return f + } + + // Compute fields without lock. + // Might duplicate effort but won't hold other computations back. + f = typeFields(t) + if f == nil { + f = []field{} + } + + fieldCache.Lock() + if fieldCache.m == nil { + fieldCache.m = map[reflect.Type][]field{} + } + fieldCache.m[t] = f + fieldCache.Unlock() + return f +} diff --git a/vendor/github.com/couchbase/gomemcached/LICENSE b/vendor/github.com/couchbase/gomemcached/LICENSE new file mode 100644 index 0000000000..b01ef80261 --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2013 Dustin Sallings + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/couchbase/gomemcached/client/mc.go b/vendor/github.com/couchbase/gomemcached/client/mc.go new file mode 100644 index 0000000000..bd1433ba28 --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/client/mc.go @@ -0,0 +1,1074 @@ +// Package memcached provides a memcached binary protocol client. +package memcached + +import ( + "encoding/binary" + "fmt" + "github.com/couchbase/gomemcached" + "github.com/couchbase/goutils/logging" + "github.com/couchbase/goutils/scramsha" + "github.com/pkg/errors" + "io" + "math" + "net" + "strings" + "sync" + "sync/atomic" + "time" +) + +type ClientIface interface { + Add(vb uint16, key string, flags int, exp int, body []byte) (*gomemcached.MCResponse, error) + Append(vb uint16, key string, data []byte) (*gomemcached.MCResponse, error) + Auth(user, pass string) (*gomemcached.MCResponse, error) + AuthList() (*gomemcached.MCResponse, error) + AuthPlain(user, pass string) (*gomemcached.MCResponse, error) + AuthScramSha(user, pass string) (*gomemcached.MCResponse, error) + CASNext(vb uint16, k string, exp int, state *CASState) bool + CAS(vb uint16, k string, f CasFunc, initexp int) (*gomemcached.MCResponse, error) + Close() error + Decr(vb uint16, key string, amt, def uint64, exp int) (uint64, error) + Del(vb uint16, key string) (*gomemcached.MCResponse, error) + EnableMutationToken() (*gomemcached.MCResponse, error) + Get(vb uint16, key string) (*gomemcached.MCResponse, error) + GetSubdoc(vb uint16, key string, subPaths []string) (*gomemcached.MCResponse, error) + GetAndTouch(vb uint16, key string, exp int) (*gomemcached.MCResponse, error) + GetBulk(vb uint16, keys []string, rv map[string]*gomemcached.MCResponse, subPaths []string) error + GetMeta(vb uint16, key string) (*gomemcached.MCResponse, error) + GetRandomDoc() (*gomemcached.MCResponse, error) + Hijack() io.ReadWriteCloser + Incr(vb uint16, key string, amt, def uint64, exp int) (uint64, error) + Observe(vb uint16, key string) (result ObserveResult, err error) + ObserveSeq(vb uint16, vbuuid uint64) (result *ObserveSeqResult, err error) + Receive() (*gomemcached.MCResponse, error) + ReceiveWithDeadline(deadline time.Time) (*gomemcached.MCResponse, error) + Send(req *gomemcached.MCRequest) (rv *gomemcached.MCResponse, err error) + Set(vb uint16, key string, flags int, exp int, body []byte) (*gomemcached.MCResponse, error) + SetKeepAliveOptions(interval time.Duration) + SetReadDeadline(t time.Time) + SetDeadline(t time.Time) + SelectBucket(bucket string) (*gomemcached.MCResponse, error) + SetCas(vb uint16, key string, flags int, exp int, cas uint64, body []byte) (*gomemcached.MCResponse, error) + Stats(key string) ([]StatValue, error) + StatsMap(key string) (map[string]string, error) + StatsMapForSpecifiedStats(key string, statsMap map[string]string) error + Transmit(req *gomemcached.MCRequest) error + TransmitWithDeadline(req *gomemcached.MCRequest, deadline time.Time) error + TransmitResponse(res *gomemcached.MCResponse) error + + // UprFeed Related + NewUprFeed() (*UprFeed, error) + NewUprFeedIface() (UprFeedIface, error) + NewUprFeedWithConfig(ackByClient bool) (*UprFeed, error) + NewUprFeedWithConfigIface(ackByClient bool) (UprFeedIface, error) + UprGetFailoverLog(vb []uint16) (map[uint16]*FailoverLog, error) +} + +const bufsize = 1024 + +var UnHealthy uint32 = 0 +var Healthy uint32 = 1 + +type Features []Feature +type Feature uint16 + +const FeatureMutationToken = Feature(0x04) +const FeatureXattr = Feature(0x06) +const FeatureDataType = Feature(0x0b) + +// The Client itself. +type Client struct { + conn io.ReadWriteCloser + // use uint32 type so that it can be accessed through atomic APIs + healthy uint32 + opaque uint32 + + hdrBuf []byte +} + +var ( + DefaultDialTimeout = time.Duration(0) // No timeout + + DefaultWriteTimeout = time.Duration(0) // No timeout + + dialFun = func(prot, dest string) (net.Conn, error) { + return net.DialTimeout(prot, dest, DefaultDialTimeout) + } +) + +// Connect to a memcached server. +func Connect(prot, dest string) (rv *Client, err error) { + conn, err := dialFun(prot, dest) + if err != nil { + return nil, err + } + return Wrap(conn) +} + +func SetDefaultTimeouts(dial, read, write time.Duration) { + DefaultDialTimeout = dial + DefaultWriteTimeout = write +} + +func SetDefaultDialTimeout(dial time.Duration) { + DefaultDialTimeout = dial +} + +func (c *Client) SetKeepAliveOptions(interval time.Duration) { + c.conn.(*net.TCPConn).SetKeepAlive(true) + c.conn.(*net.TCPConn).SetKeepAlivePeriod(interval) +} + +func (c *Client) SetReadDeadline(t time.Time) { + c.conn.(*net.TCPConn).SetReadDeadline(t) +} + +func (c *Client) SetDeadline(t time.Time) { + c.conn.(*net.TCPConn).SetDeadline(t) +} + +// Wrap an existing transport. +func Wrap(rwc io.ReadWriteCloser) (rv *Client, err error) { + client := &Client{ + conn: rwc, + hdrBuf: make([]byte, gomemcached.HDR_LEN), + opaque: uint32(1), + } + client.setHealthy(true) + return client, nil +} + +// Close the connection when you're done. +func (c *Client) Close() error { + return c.conn.Close() +} + +// IsHealthy returns true unless the client is belived to have +// difficulty communicating to its server. +// +// This is useful for connection pools where we want to +// non-destructively determine that a connection may be reused. +func (c Client) IsHealthy() bool { + healthyState := atomic.LoadUint32(&c.healthy) + return healthyState == Healthy +} + +// Send a custom request and get the response. +func (c *Client) Send(req *gomemcached.MCRequest) (rv *gomemcached.MCResponse, err error) { + err = c.Transmit(req) + if err != nil { + return + } + resp, _, err := getResponse(c.conn, c.hdrBuf) + c.setHealthy(!gomemcached.IsFatal(err)) + return resp, err +} + +// Transmit send a request, but does not wait for a response. +func (c *Client) Transmit(req *gomemcached.MCRequest) error { + if DefaultWriteTimeout > 0 { + c.conn.(net.Conn).SetWriteDeadline(time.Now().Add(DefaultWriteTimeout)) + } + _, err := transmitRequest(c.conn, req) + // clear write deadline to avoid interference with future write operations + if DefaultWriteTimeout > 0 { + c.conn.(net.Conn).SetWriteDeadline(time.Time{}) + } + if err != nil { + c.setHealthy(false) + } + return err +} + +func (c *Client) TransmitWithDeadline(req *gomemcached.MCRequest, deadline time.Time) error { + c.conn.(net.Conn).SetWriteDeadline(deadline) + + _, err := transmitRequest(c.conn, req) + + // clear write deadline to avoid interference with future write operations + c.conn.(net.Conn).SetWriteDeadline(time.Time{}) + + if err != nil { + c.setHealthy(false) + } + return err +} + +// TransmitResponse send a response, does not wait. +func (c *Client) TransmitResponse(res *gomemcached.MCResponse) error { + if DefaultWriteTimeout > 0 { + c.conn.(net.Conn).SetWriteDeadline(time.Now().Add(DefaultWriteTimeout)) + } + _, err := transmitResponse(c.conn, res) + // clear write deadline to avoid interference with future write operations + if DefaultWriteTimeout > 0 { + c.conn.(net.Conn).SetWriteDeadline(time.Time{}) + } + if err != nil { + c.setHealthy(false) + } + return err +} + +// Receive a response +func (c *Client) Receive() (*gomemcached.MCResponse, error) { + resp, _, err := getResponse(c.conn, c.hdrBuf) + if err != nil && resp.Status != gomemcached.KEY_ENOENT && resp.Status != gomemcached.EBUSY { + c.setHealthy(false) + } + return resp, err +} + +func (c *Client) ReceiveWithDeadline(deadline time.Time) (*gomemcached.MCResponse, error) { + c.conn.(net.Conn).SetReadDeadline(deadline) + + resp, _, err := getResponse(c.conn, c.hdrBuf) + + // Clear read deadline to avoid interference with future read operations. + c.conn.(net.Conn).SetReadDeadline(time.Time{}) + + if err != nil && resp.Status != gomemcached.KEY_ENOENT && resp.Status != gomemcached.EBUSY { + c.setHealthy(false) + } + return resp, err +} + +func appendMutationToken(bytes []byte) []byte { + bytes = append(bytes, 0, 0) + binary.BigEndian.PutUint16(bytes[len(bytes)-2:], uint16(0x04)) + return bytes +} + +//Send a hello command to enable MutationTokens +func (c *Client) EnableMutationToken() (*gomemcached.MCResponse, error) { + var payload []byte + payload = appendMutationToken(payload) + + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.HELLO, + Key: []byte("GoMemcached"), + Body: payload, + }) + +} + +//Send a hello command to enable specific features +func (c *Client) EnableFeatures(features Features) (*gomemcached.MCResponse, error) { + var payload []byte + + for _, feature := range features { + payload = append(payload, 0, 0) + binary.BigEndian.PutUint16(payload[len(payload)-2:], uint16(feature)) + } + + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.HELLO, + Key: []byte("GoMemcached"), + Body: payload, + }) + +} + +// Get the value for a key. +func (c *Client) Get(vb uint16, key string) (*gomemcached.MCResponse, error) { + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.GET, + VBucket: vb, + Key: []byte(key), + }) +} + +// Get the xattrs, doc value for the input key +func (c *Client) GetSubdoc(vb uint16, key string, subPaths []string) (*gomemcached.MCResponse, error) { + + extraBuf, valueBuf := GetSubDocVal(subPaths) + res, err := c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.SUBDOC_MULTI_LOOKUP, + VBucket: vb, + Key: []byte(key), + Extras: extraBuf, + Body: valueBuf, + }) + + if err != nil && IfResStatusError(res) { + return res, err + } + return res, nil +} + +// Get the value for a key, and update expiry +func (c *Client) GetAndTouch(vb uint16, key string, exp int) (*gomemcached.MCResponse, error) { + extraBuf := make([]byte, 4) + binary.BigEndian.PutUint32(extraBuf[0:], uint32(exp)) + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.GAT, + VBucket: vb, + Key: []byte(key), + Extras: extraBuf, + }) +} + +// Get metadata for a key +func (c *Client) GetMeta(vb uint16, key string) (*gomemcached.MCResponse, error) { + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.GET_META, + VBucket: vb, + Key: []byte(key), + }) +} + +// Del deletes a key. +func (c *Client) Del(vb uint16, key string) (*gomemcached.MCResponse, error) { + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.DELETE, + VBucket: vb, + Key: []byte(key)}) +} + +// Get a random document +func (c *Client) GetRandomDoc() (*gomemcached.MCResponse, error) { + return c.Send(&gomemcached.MCRequest{ + Opcode: 0xB6, + }) +} + +// AuthList lists SASL auth mechanisms. +func (c *Client) AuthList() (*gomemcached.MCResponse, error) { + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.SASL_LIST_MECHS}) +} + +// Auth performs SASL PLAIN authentication against the server. +func (c *Client) Auth(user, pass string) (*gomemcached.MCResponse, error) { + res, err := c.AuthList() + + if err != nil { + return res, err + } + + authMech := string(res.Body) + if strings.Index(authMech, "PLAIN") != -1 { + return c.AuthPlain(user, pass) + } + return nil, fmt.Errorf("auth mechanism PLAIN not supported") +} + +// AuthScramSha performs SCRAM-SHA authentication against the server. +func (c *Client) AuthScramSha(user, pass string) (*gomemcached.MCResponse, error) { + res, err := c.AuthList() + if err != nil { + return nil, errors.Wrap(err, "Unable to obtain list of methods.") + } + + methods := string(res.Body) + method, err := scramsha.BestMethod(methods) + if err != nil { + return nil, errors.Wrap(err, + "Unable to select SCRAM-SHA method.") + } + + s, err := scramsha.NewScramSha(method) + if err != nil { + return nil, errors.Wrap(err, "Unable to initialize scramsha.") + } + + logging.Infof("Using %v authentication for user %v%v%v", method, gomemcached.UdTagBegin, user, gomemcached.UdTagEnd) + + message, err := s.GetStartRequest(user) + if err != nil { + return nil, errors.Wrapf(err, + "Error building start request for user %s.", user) + } + + startRequest := &gomemcached.MCRequest{ + Opcode: 0x21, + Key: []byte(method), + Body: []byte(message)} + + startResponse, err := c.Send(startRequest) + if err != nil { + return nil, errors.Wrap(err, "Error sending start request.") + } + + err = s.HandleStartResponse(string(startResponse.Body)) + if err != nil { + return nil, errors.Wrap(err, "Error handling start response.") + } + + message = s.GetFinalRequest(pass) + + // send step request + finalRequest := &gomemcached.MCRequest{ + Opcode: 0x22, + Key: []byte(method), + Body: []byte(message)} + finalResponse, err := c.Send(finalRequest) + if err != nil { + return nil, errors.Wrap(err, "Error sending final request.") + } + + err = s.HandleFinalResponse(string(finalResponse.Body)) + if err != nil { + return nil, errors.Wrap(err, "Error handling final response.") + } + + return finalResponse, nil +} + +func (c *Client) AuthPlain(user, pass string) (*gomemcached.MCResponse, error) { + logging.Infof("Using plain authentication for user %v%v%v", gomemcached.UdTagBegin, user, gomemcached.UdTagEnd) + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.SASL_AUTH, + Key: []byte("PLAIN"), + Body: []byte(fmt.Sprintf("\x00%s\x00%s", user, pass))}) +} + +// select bucket +func (c *Client) SelectBucket(bucket string) (*gomemcached.MCResponse, error) { + + return c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.SELECT_BUCKET, + Key: []byte(fmt.Sprintf("%s", bucket))}) +} + +func (c *Client) store(opcode gomemcached.CommandCode, vb uint16, + key string, flags int, exp int, body []byte) (*gomemcached.MCResponse, error) { + + req := &gomemcached.MCRequest{ + Opcode: opcode, + VBucket: vb, + Key: []byte(key), + Cas: 0, + Opaque: 0, + Extras: []byte{0, 0, 0, 0, 0, 0, 0, 0}, + Body: body} + + binary.BigEndian.PutUint64(req.Extras, uint64(flags)<<32|uint64(exp)) + return c.Send(req) +} + +func (c *Client) storeCas(opcode gomemcached.CommandCode, vb uint16, + key string, flags int, exp int, cas uint64, body []byte) (*gomemcached.MCResponse, error) { + + req := &gomemcached.MCRequest{ + Opcode: opcode, + VBucket: vb, + Key: []byte(key), + Cas: cas, + Opaque: 0, + Extras: []byte{0, 0, 0, 0, 0, 0, 0, 0}, + Body: body} + + binary.BigEndian.PutUint64(req.Extras, uint64(flags)<<32|uint64(exp)) + return c.Send(req) +} + +// Incr increments the value at the given key. +func (c *Client) Incr(vb uint16, key string, + amt, def uint64, exp int) (uint64, error) { + + req := &gomemcached.MCRequest{ + Opcode: gomemcached.INCREMENT, + VBucket: vb, + Key: []byte(key), + Extras: make([]byte, 8+8+4), + } + binary.BigEndian.PutUint64(req.Extras[:8], amt) + binary.BigEndian.PutUint64(req.Extras[8:16], def) + binary.BigEndian.PutUint32(req.Extras[16:20], uint32(exp)) + + resp, err := c.Send(req) + if err != nil { + return 0, err + } + + return binary.BigEndian.Uint64(resp.Body), nil +} + +// Decr decrements the value at the given key. +func (c *Client) Decr(vb uint16, key string, + amt, def uint64, exp int) (uint64, error) { + + req := &gomemcached.MCRequest{ + Opcode: gomemcached.DECREMENT, + VBucket: vb, + Key: []byte(key), + Extras: make([]byte, 8+8+4), + } + binary.BigEndian.PutUint64(req.Extras[:8], amt) + binary.BigEndian.PutUint64(req.Extras[8:16], def) + binary.BigEndian.PutUint32(req.Extras[16:20], uint32(exp)) + + resp, err := c.Send(req) + if err != nil { + return 0, err + } + + return binary.BigEndian.Uint64(resp.Body), nil +} + +// Add a value for a key (store if not exists). +func (c *Client) Add(vb uint16, key string, flags int, exp int, + body []byte) (*gomemcached.MCResponse, error) { + return c.store(gomemcached.ADD, vb, key, flags, exp, body) +} + +// Set the value for a key. +func (c *Client) Set(vb uint16, key string, flags int, exp int, + body []byte) (*gomemcached.MCResponse, error) { + return c.store(gomemcached.SET, vb, key, flags, exp, body) +} + +// SetCas set the value for a key with cas +func (c *Client) SetCas(vb uint16, key string, flags int, exp int, cas uint64, + body []byte) (*gomemcached.MCResponse, error) { + return c.storeCas(gomemcached.SET, vb, key, flags, exp, cas, body) +} + +// Append data to the value of a key. +func (c *Client) Append(vb uint16, key string, data []byte) (*gomemcached.MCResponse, error) { + req := &gomemcached.MCRequest{ + Opcode: gomemcached.APPEND, + VBucket: vb, + Key: []byte(key), + Cas: 0, + Opaque: 0, + Body: data} + + return c.Send(req) +} + +// GetBulk gets keys in bulk +func (c *Client) GetBulk(vb uint16, keys []string, rv map[string]*gomemcached.MCResponse, subPaths []string) error { + stopch := make(chan bool) + var wg sync.WaitGroup + + defer func() { + close(stopch) + wg.Wait() + }() + + if (math.MaxInt32 - c.opaque) < (uint32(len(keys)) + 1) { + c.opaque = uint32(1) + } + + opStart := c.opaque + + errch := make(chan error, 2) + + wg.Add(1) + go func() { + defer func() { + if r := recover(); r != nil { + logging.Infof("Recovered in f %v", r) + } + errch <- nil + wg.Done() + }() + + ok := true + for ok { + + select { + case <-stopch: + return + default: + res, err := c.Receive() + + if err != nil && IfResStatusError(res) { + if res == nil || res.Status != gomemcached.KEY_ENOENT { + errch <- err + return + } + // continue receiving in case of KEY_ENOENT + } else if res.Opcode == gomemcached.GET || + res.Opcode == gomemcached.SUBDOC_GET || + res.Opcode == gomemcached.SUBDOC_MULTI_LOOKUP { + opaque := res.Opaque - opStart + if opaque < 0 || opaque >= uint32(len(keys)) { + // Every now and then we seem to be seeing an invalid opaque + // value returned from the server. When this happens log the error + // and the calling function will retry the bulkGet. MB-15140 + logging.Errorf(" Invalid opaque Value. Debug info : Res.opaque : %v(%v), Keys %v, Response received %v \n key list %v this key %v", res.Opaque, opaque, len(keys), res, keys, string(res.Body)) + errch <- fmt.Errorf("Out of Bounds error") + return + } + + rv[keys[opaque]] = res + } + + if res.Opcode == gomemcached.NOOP { + ok = false + } + } + } + }() + + memcachedReqPkt := &gomemcached.MCRequest{ + Opcode: gomemcached.GET, + VBucket: vb, + } + + if len(subPaths) > 0 { + extraBuf, valueBuf := GetSubDocVal(subPaths) + memcachedReqPkt.Opcode = gomemcached.SUBDOC_MULTI_LOOKUP + memcachedReqPkt.Extras = extraBuf + memcachedReqPkt.Body = valueBuf + } + + for _, k := range keys { // Start of Get request + memcachedReqPkt.Key = []byte(k) + memcachedReqPkt.Opaque = c.opaque + + err := c.Transmit(memcachedReqPkt) + if err != nil { + logging.Errorf(" Transmit failed in GetBulkAll %v", err) + return err + } + c.opaque++ + } // End of Get request + + // finally transmit a NOOP + err := c.Transmit(&gomemcached.MCRequest{ + Opcode: gomemcached.NOOP, + VBucket: vb, + Opaque: c.opaque, + }) + + if err != nil { + logging.Errorf(" Transmit of NOOP failed %v", err) + return err + } + c.opaque++ + + return <-errch +} + +func GetSubDocVal(subPaths []string) (extraBuf, valueBuf []byte) { + + var ops []string + totalBytesLen := 0 + num := 1 + + for _, v := range subPaths { + totalBytesLen = totalBytesLen + len([]byte(v)) + ops = append(ops, v) + num = num + 1 + } + + // Xattr retrieval - subdoc multi get + extraBuf = append(extraBuf, uint8(0x04)) + + valueBuf = make([]byte, num*4+totalBytesLen) + + //opcode for subdoc get + op := gomemcached.SUBDOC_GET + + // Calculate path total bytes + // There are 2 ops - get xattrs - both input and $document and get whole doc + valIter := 0 + + for _, v := range ops { + pathBytes := []byte(v) + valueBuf[valIter+0] = uint8(op) + + // SubdocFlagXattrPath indicates that the path refers to + // an Xattr rather than the document body. + valueBuf[valIter+1] = uint8(gomemcached.SUBDOC_FLAG_XATTR) + + // 2 byte key + binary.BigEndian.PutUint16(valueBuf[valIter+2:], uint16(len(pathBytes))) + + // Then n bytes path + copy(valueBuf[valIter+4:], pathBytes) + valIter = valIter + 4 + len(pathBytes) + } + + return +} + +// ObservedStatus is the type reported by the Observe method +type ObservedStatus uint8 + +// Observation status values. +const ( + ObservedNotPersisted = ObservedStatus(0x00) // found, not persisted + ObservedPersisted = ObservedStatus(0x01) // found, persisted + ObservedNotFound = ObservedStatus(0x80) // not found (or a persisted delete) + ObservedLogicallyDeleted = ObservedStatus(0x81) // pending deletion (not persisted yet) +) + +// ObserveResult represents the data obtained by an Observe call +type ObserveResult struct { + Status ObservedStatus // Whether the value has been persisted/deleted + Cas uint64 // Current value's CAS + PersistenceTime time.Duration // Node's average time to persist a value + ReplicationTime time.Duration // Node's average time to replicate a value +} + +// Observe gets the persistence/replication/CAS state of a key +func (c *Client) Observe(vb uint16, key string) (result ObserveResult, err error) { + // http://www.couchbase.com/wiki/display/couchbase/Observe + body := make([]byte, 4+len(key)) + binary.BigEndian.PutUint16(body[0:2], vb) + binary.BigEndian.PutUint16(body[2:4], uint16(len(key))) + copy(body[4:4+len(key)], key) + + res, err := c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.OBSERVE, + VBucket: vb, + Body: body, + }) + if err != nil { + return + } + + // Parse the response data from the body: + if len(res.Body) < 2+2+1 { + err = io.ErrUnexpectedEOF + return + } + outVb := binary.BigEndian.Uint16(res.Body[0:2]) + keyLen := binary.BigEndian.Uint16(res.Body[2:4]) + if len(res.Body) < 2+2+int(keyLen)+1+8 { + err = io.ErrUnexpectedEOF + return + } + outKey := string(res.Body[4 : 4+keyLen]) + if outVb != vb || outKey != key { + err = fmt.Errorf("observe returned wrong vbucket/key: %d/%q", outVb, outKey) + return + } + result.Status = ObservedStatus(res.Body[4+keyLen]) + result.Cas = binary.BigEndian.Uint64(res.Body[5+keyLen:]) + // The response reuses the Cas field to store time statistics: + result.PersistenceTime = time.Duration(res.Cas>>32) * time.Millisecond + result.ReplicationTime = time.Duration(res.Cas&math.MaxUint32) * time.Millisecond + return +} + +// CheckPersistence checks whether a stored value has been persisted to disk yet. +func (result ObserveResult) CheckPersistence(cas uint64, deletion bool) (persisted bool, overwritten bool) { + switch { + case result.Status == ObservedNotFound && deletion: + persisted = true + case result.Cas != cas: + overwritten = true + case result.Status == ObservedPersisted: + persisted = true + } + return +} + +// Sequence number based Observe Implementation +type ObserveSeqResult struct { + Failover uint8 // Set to 1 if a failover took place + VbId uint16 // vbucket id + Vbuuid uint64 // vucket uuid + LastPersistedSeqNo uint64 // last persisted sequence number + CurrentSeqNo uint64 // current sequence number + OldVbuuid uint64 // Old bucket vbuuid + LastSeqNo uint64 // last sequence number received before failover +} + +func (c *Client) ObserveSeq(vb uint16, vbuuid uint64) (result *ObserveSeqResult, err error) { + // http://www.couchbase.com/wiki/display/couchbase/Observe + body := make([]byte, 8) + binary.BigEndian.PutUint64(body[0:8], vbuuid) + + res, err := c.Send(&gomemcached.MCRequest{ + Opcode: gomemcached.OBSERVE_SEQNO, + VBucket: vb, + Body: body, + Opaque: 0x01, + }) + if err != nil { + return + } + + if res.Status != gomemcached.SUCCESS { + return nil, fmt.Errorf(" Observe returned error %v", res.Status) + } + + // Parse the response data from the body: + if len(res.Body) < (1 + 2 + 8 + 8 + 8) { + err = io.ErrUnexpectedEOF + return + } + + result = &ObserveSeqResult{} + result.Failover = res.Body[0] + result.VbId = binary.BigEndian.Uint16(res.Body[1:3]) + result.Vbuuid = binary.BigEndian.Uint64(res.Body[3:11]) + result.LastPersistedSeqNo = binary.BigEndian.Uint64(res.Body[11:19]) + result.CurrentSeqNo = binary.BigEndian.Uint64(res.Body[19:27]) + + // in case of failover processing we can have old vbuuid and the last persisted seq number + if result.Failover == 1 && len(res.Body) >= (1+2+8+8+8+8+8) { + result.OldVbuuid = binary.BigEndian.Uint64(res.Body[27:35]) + result.LastSeqNo = binary.BigEndian.Uint64(res.Body[35:43]) + } + + return +} + +// CasOp is the type of operation to perform on this CAS loop. +type CasOp uint8 + +const ( + // CASStore instructs the server to store the new value normally + CASStore = CasOp(iota) + // CASQuit instructs the client to stop attempting to CAS, leaving value untouched + CASQuit + // CASDelete instructs the server to delete the current value + CASDelete +) + +// User specified termination is returned as an error. +func (c CasOp) Error() string { + switch c { + case CASStore: + return "CAS store" + case CASQuit: + return "CAS quit" + case CASDelete: + return "CAS delete" + } + panic("Unhandled value") +} + +//////// CAS TRANSFORM + +// CASState tracks the state of CAS over several operations. +// +// This is used directly by CASNext and indirectly by CAS +type CASState struct { + initialized bool // false on the first call to CASNext, then true + Value []byte // Current value of key; update in place to new value + Cas uint64 // Current CAS value of key + Exists bool // Does a value exist for the key? (If not, Value will be nil) + Err error // Error, if any, after CASNext returns false + resp *gomemcached.MCResponse +} + +// CASNext is a non-callback, loop-based version of CAS method. +// +// Usage is like this: +// +// var state memcached.CASState +// for client.CASNext(vb, key, exp, &state) { +// state.Value = some_mutation(state.Value) +// } +// if state.Err != nil { ... } +func (c *Client) CASNext(vb uint16, k string, exp int, state *CASState) bool { + if state.initialized { + if !state.Exists { + // Adding a new key: + if state.Value == nil { + state.Cas = 0 + return false // no-op (delete of non-existent value) + } + state.resp, state.Err = c.Add(vb, k, 0, exp, state.Value) + } else { + // Updating / deleting a key: + req := &gomemcached.MCRequest{ + Opcode: gomemcached.DELETE, + VBucket: vb, + Key: []byte(k), + Cas: state.Cas} + if state.Value != nil { + req.Opcode = gomemcached.SET + req.Opaque = 0 + req.Extras = []byte{0, 0, 0, 0, 0, 0, 0, 0} + req.Body = state.Value + + flags := 0 + binary.BigEndian.PutUint64(req.Extras, uint64(flags)<<32|uint64(exp)) + } + state.resp, state.Err = c.Send(req) + } + + // If the response status is KEY_EEXISTS or NOT_STORED there's a conflict and we'll need to + // get the new value (below). Otherwise, we're done (either success or failure) so return: + if !(state.resp != nil && (state.resp.Status == gomemcached.KEY_EEXISTS || + state.resp.Status == gomemcached.NOT_STORED)) { + state.Cas = state.resp.Cas + return false // either success or fatal error + } + } + + // Initial call, or after a conflict: GET the current value and CAS and return them: + state.initialized = true + if state.resp, state.Err = c.Get(vb, k); state.Err == nil { + state.Exists = true + state.Value = state.resp.Body + state.Cas = state.resp.Cas + } else if state.resp != nil && state.resp.Status == gomemcached.KEY_ENOENT { + state.Err = nil + state.Exists = false + state.Value = nil + state.Cas = 0 + } else { + return false // fatal error + } + return true // keep going... +} + +// CasFunc is type type of function to perform a CAS transform. +// +// Input is the current value, or nil if no value exists. +// The function should return the new value (if any) to set, and the store/quit/delete operation. +type CasFunc func(current []byte) ([]byte, CasOp) + +// CAS performs a CAS transform with the given function. +// +// If the value does not exist, a nil current value will be sent to f. +func (c *Client) CAS(vb uint16, k string, f CasFunc, + initexp int) (*gomemcached.MCResponse, error) { + var state CASState + for c.CASNext(vb, k, initexp, &state) { + newValue, operation := f(state.Value) + if operation == CASQuit || (operation == CASDelete && state.Value == nil) { + return nil, operation + } + state.Value = newValue + } + return state.resp, state.Err +} + +// StatValue is one of the stats returned from the Stats method. +type StatValue struct { + // The stat key + Key string + // The stat value + Val string +} + +// Stats requests server-side stats. +// +// Use "" as the stat key for toplevel stats. +func (c *Client) Stats(key string) ([]StatValue, error) { + rv := make([]StatValue, 0, 128) + + req := &gomemcached.MCRequest{ + Opcode: gomemcached.STAT, + Key: []byte(key), + Opaque: 918494, + } + + err := c.Transmit(req) + if err != nil { + return rv, err + } + + for { + res, _, err := getResponse(c.conn, c.hdrBuf) + if err != nil { + return rv, err + } + k := string(res.Key) + if k == "" { + break + } + rv = append(rv, StatValue{ + Key: k, + Val: string(res.Body), + }) + } + return rv, nil +} + +// StatsMap requests server-side stats similarly to Stats, but returns +// them as a map. +// +// Use "" as the stat key for toplevel stats. +func (c *Client) StatsMap(key string) (map[string]string, error) { + rv := make(map[string]string) + + req := &gomemcached.MCRequest{ + Opcode: gomemcached.STAT, + Key: []byte(key), + Opaque: 918494, + } + + err := c.Transmit(req) + if err != nil { + return rv, err + } + + for { + res, _, err := getResponse(c.conn, c.hdrBuf) + if err != nil { + return rv, err + } + k := string(res.Key) + if k == "" { + break + } + rv[k] = string(res.Body) + } + + return rv, nil +} + +// instead of returning a new statsMap, simply populate passed in statsMap, which contains all the keys +// for which stats needs to be retrieved +func (c *Client) StatsMapForSpecifiedStats(key string, statsMap map[string]string) error { + + // clear statsMap + for key, _ := range statsMap { + statsMap[key] = "" + } + + req := &gomemcached.MCRequest{ + Opcode: gomemcached.STAT, + Key: []byte(key), + Opaque: 918494, + } + + err := c.Transmit(req) + if err != nil { + return err + } + + for { + res, _, err := getResponse(c.conn, c.hdrBuf) + if err != nil { + return err + } + k := string(res.Key) + if k == "" { + break + } + if _, ok := statsMap[k]; ok { + statsMap[k] = string(res.Body) + } + } + + return nil +} + +// Hijack exposes the underlying connection from this client. +// +// It also marks the connection as unhealthy since the client will +// have lost control over the connection and can't otherwise verify +// things are in good shape for connection pools. +func (c *Client) Hijack() io.ReadWriteCloser { + c.setHealthy(false) + return c.conn +} + +func (c *Client) setHealthy(healthy bool) { + healthyState := UnHealthy + if healthy { + healthyState = Healthy + } + atomic.StoreUint32(&c.healthy, healthyState) +} + +func IfResStatusError(response *gomemcached.MCResponse) bool { + return response == nil || + (response.Status != gomemcached.SUBDOC_BAD_MULTI && + response.Status != gomemcached.SUBDOC_PATH_NOT_FOUND && + response.Status != gomemcached.SUBDOC_MULTI_PATH_FAILURE_DELETED) +} diff --git a/vendor/github.com/couchbase/gomemcached/client/tap_feed.go b/vendor/github.com/couchbase/gomemcached/client/tap_feed.go new file mode 100644 index 0000000000..fd628c5de2 --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/client/tap_feed.go @@ -0,0 +1,333 @@ +package memcached + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" + + "github.com/couchbase/gomemcached" + "github.com/couchbase/goutils/logging" +) + +// TAP protocol docs: <http://www.couchbase.com/wiki/display/couchbase/TAP+Protocol> + +// TapOpcode is the tap operation type (found in TapEvent) +type TapOpcode uint8 + +// Tap opcode values. +const ( + TapBeginBackfill = TapOpcode(iota) + TapEndBackfill + TapMutation + TapDeletion + TapCheckpointStart + TapCheckpointEnd + tapEndStream +) + +const tapMutationExtraLen = 16 + +var tapOpcodeNames map[TapOpcode]string + +func init() { + tapOpcodeNames = map[TapOpcode]string{ + TapBeginBackfill: "BeginBackfill", + TapEndBackfill: "EndBackfill", + TapMutation: "Mutation", + TapDeletion: "Deletion", + TapCheckpointStart: "TapCheckpointStart", + TapCheckpointEnd: "TapCheckpointEnd", + tapEndStream: "EndStream", + } +} + +func (opcode TapOpcode) String() string { + name := tapOpcodeNames[opcode] + if name == "" { + name = fmt.Sprintf("#%d", opcode) + } + return name +} + +// TapEvent is a TAP notification of an operation on the server. +type TapEvent struct { + Opcode TapOpcode // Type of event + VBucket uint16 // VBucket this event applies to + Flags uint32 // Item flags + Expiry uint32 // Item expiration time + Key, Value []byte // Item key/value + Cas uint64 +} + +func makeTapEvent(req gomemcached.MCRequest) *TapEvent { + event := TapEvent{ + VBucket: req.VBucket, + } + switch req.Opcode { + case gomemcached.TAP_MUTATION: + event.Opcode = TapMutation + event.Key = req.Key + event.Value = req.Body + event.Cas = req.Cas + case gomemcached.TAP_DELETE: + event.Opcode = TapDeletion + event.Key = req.Key + event.Cas = req.Cas + case gomemcached.TAP_CHECKPOINT_START: + event.Opcode = TapCheckpointStart + case gomemcached.TAP_CHECKPOINT_END: + event.Opcode = TapCheckpointEnd + case gomemcached.TAP_OPAQUE: + if len(req.Extras) < 8+4 { + return nil + } + switch op := int(binary.BigEndian.Uint32(req.Extras[8:])); op { + case gomemcached.TAP_OPAQUE_INITIAL_VBUCKET_STREAM: + event.Opcode = TapBeginBackfill + case gomemcached.TAP_OPAQUE_CLOSE_BACKFILL: + event.Opcode = TapEndBackfill + case gomemcached.TAP_OPAQUE_CLOSE_TAP_STREAM: + event.Opcode = tapEndStream + case gomemcached.TAP_OPAQUE_ENABLE_AUTO_NACK: + return nil + case gomemcached.TAP_OPAQUE_ENABLE_CHECKPOINT_SYNC: + return nil + default: + logging.Infof("TapFeed: Ignoring TAP_OPAQUE/%d", op) + return nil // unknown opaque event + } + case gomemcached.NOOP: + return nil // ignore + default: + logging.Infof("TapFeed: Ignoring %s", req.Opcode) + return nil // unknown event + } + + if len(req.Extras) >= tapMutationExtraLen && + (event.Opcode == TapMutation || event.Opcode == TapDeletion) { + + event.Flags = binary.BigEndian.Uint32(req.Extras[8:]) + event.Expiry = binary.BigEndian.Uint32(req.Extras[12:]) + } + + return &event +} + +func (event TapEvent) String() string { + switch event.Opcode { + case TapBeginBackfill, TapEndBackfill, TapCheckpointStart, TapCheckpointEnd: + return fmt.Sprintf("<TapEvent %s, vbucket=%d>", + event.Opcode, event.VBucket) + default: + return fmt.Sprintf("<TapEvent %s, key=%q (%d bytes) flags=%x, exp=%d>", + event.Opcode, event.Key, len(event.Value), + event.Flags, event.Expiry) + } +} + +// TapArguments are parameters for requesting a TAP feed. +// +// Call DefaultTapArguments to get a default one. +type TapArguments struct { + // Timestamp of oldest item to send. + // + // Use TapNoBackfill to suppress all past items. + Backfill uint64 + // If set, server will disconnect after sending existing items. + Dump bool + // The indices of the vbuckets to watch; empty/nil to watch all. + VBuckets []uint16 + // Transfers ownership of vbuckets during cluster rebalance. + Takeover bool + // If true, server will wait for client ACK after every notification. + SupportAck bool + // If true, client doesn't want values so server shouldn't send them. + KeysOnly bool + // If true, client wants the server to send checkpoint events. + Checkpoint bool + // Optional identifier to use for this client, to allow reconnects + ClientName string + // Registers this client (by name) till explicitly deregistered. + RegisteredClient bool +} + +// Value for TapArguments.Backfill denoting that no past events at all +// should be sent. +const TapNoBackfill = math.MaxUint64 + +// DefaultTapArguments returns a default set of parameter values to +// pass to StartTapFeed. +func DefaultTapArguments() TapArguments { + return TapArguments{ + Backfill: TapNoBackfill, + } +} + +func (args *TapArguments) flags() []byte { + var flags gomemcached.TapConnectFlag + if args.Backfill != 0 { + flags |= gomemcached.BACKFILL + } + if args.Dump { + flags |= gomemcached.DUMP + } + if len(args.VBuckets) > 0 { + flags |= gomemcached.LIST_VBUCKETS + } + if args.Takeover { + flags |= gomemcached.TAKEOVER_VBUCKETS + } + if args.SupportAck { + flags |= gomemcached.SUPPORT_ACK + } + if args.KeysOnly { + flags |= gomemcached.REQUEST_KEYS_ONLY + } + if args.Checkpoint { + flags |= gomemcached.CHECKPOINT + } + if args.RegisteredClient { + flags |= gomemcached.REGISTERED_CLIENT + } + encoded := make([]byte, 4) + binary.BigEndian.PutUint32(encoded, uint32(flags)) + return encoded +} + +func must(err error) { + if err != nil { + panic(err) + } +} + +func (args *TapArguments) bytes() (rv []byte) { + buf := bytes.NewBuffer([]byte{}) + + if args.Backfill > 0 { + must(binary.Write(buf, binary.BigEndian, uint64(args.Backfill))) + } + + if len(args.VBuckets) > 0 { + must(binary.Write(buf, binary.BigEndian, uint16(len(args.VBuckets)))) + for i := 0; i < len(args.VBuckets); i++ { + must(binary.Write(buf, binary.BigEndian, uint16(args.VBuckets[i]))) + } + } + return buf.Bytes() +} + +// TapFeed represents a stream of events from a server. +type TapFeed struct { + C <-chan TapEvent + Error error + closer chan bool +} + +// StartTapFeed starts a TAP feed on a client connection. +// +// The events can be read from the returned channel. The connection +// can no longer be used for other purposes; it's now reserved for +// receiving the TAP messages. To stop receiving events, close the +// client connection. +func (mc *Client) StartTapFeed(args TapArguments) (*TapFeed, error) { + rq := &gomemcached.MCRequest{ + Opcode: gomemcached.TAP_CONNECT, + Key: []byte(args.ClientName), + Extras: args.flags(), + Body: args.bytes()} + + err := mc.Transmit(rq) + if err != nil { + return nil, err + } + + ch := make(chan TapEvent) + feed := &TapFeed{ + C: ch, + closer: make(chan bool), + } + go mc.runFeed(ch, feed) + return feed, nil +} + +// TapRecvHook is called after every incoming tap packet is received. +var TapRecvHook func(*gomemcached.MCRequest, int, error) + +// Internal goroutine that reads from the socket and writes events to +// the channel +func (mc *Client) runFeed(ch chan TapEvent, feed *TapFeed) { + defer close(ch) + var headerBuf [gomemcached.HDR_LEN]byte +loop: + for { + // Read the next request from the server. + // + // (Can't call mc.Receive() because it reads a + // _response_ not a request.) + var pkt gomemcached.MCRequest + n, err := pkt.Receive(mc.conn, headerBuf[:]) + if TapRecvHook != nil { + TapRecvHook(&pkt, n, err) + } + + if err != nil { + if err != io.EOF { + feed.Error = err + } + break loop + } + + //logging.Infof("** TapFeed received %#v : %q", pkt, pkt.Body) + + if pkt.Opcode == gomemcached.TAP_CONNECT { + // This is not an event from the server; it's + // an error response to my connect request. + feed.Error = fmt.Errorf("tap connection failed: %s", pkt.Body) + break loop + } + + event := makeTapEvent(pkt) + if event != nil { + if event.Opcode == tapEndStream { + break loop + } + + select { + case ch <- *event: + case <-feed.closer: + break loop + } + } + + if len(pkt.Extras) >= 4 { + reqFlags := binary.BigEndian.Uint16(pkt.Extras[2:]) + if reqFlags&gomemcached.TAP_ACK != 0 { + if _, err := mc.sendAck(&pkt); err != nil { + feed.Error = err + break loop + } + } + } + } + if err := mc.Close(); err != nil { + logging.Errorf("Error closing memcached client: %v", err) + } +} + +func (mc *Client) sendAck(pkt *gomemcached.MCRequest) (int, error) { + res := gomemcached.MCResponse{ + Opcode: pkt.Opcode, + Opaque: pkt.Opaque, + Status: gomemcached.SUCCESS, + } + return res.Transmit(mc.conn) +} + +// Close terminates a TapFeed. +// +// Call this if you stop using a TapFeed before its channel ends. +func (feed *TapFeed) Close() { + close(feed.closer) +} diff --git a/vendor/github.com/couchbase/gomemcached/client/transport.go b/vendor/github.com/couchbase/gomemcached/client/transport.go new file mode 100644 index 0000000000..f4cea17fca --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/client/transport.go @@ -0,0 +1,67 @@ +package memcached + +import ( + "errors" + "io" + + "github.com/couchbase/gomemcached" +) + +var errNoConn = errors.New("no connection") + +// UnwrapMemcachedError converts memcached errors to normal responses. +// +// If the error is a memcached response, declare the error to be nil +// so a client can handle the status without worrying about whether it +// indicates success or failure. +func UnwrapMemcachedError(rv *gomemcached.MCResponse, + err error) (*gomemcached.MCResponse, error) { + + if rv == err { + return rv, nil + } + return rv, err +} + +// ReceiveHook is called after every packet is received (or attempted to be) +var ReceiveHook func(*gomemcached.MCResponse, int, error) + +func getResponse(s io.Reader, hdrBytes []byte) (rv *gomemcached.MCResponse, n int, err error) { + if s == nil { + return nil, 0, errNoConn + } + + rv = &gomemcached.MCResponse{} + n, err = rv.Receive(s, hdrBytes) + + if ReceiveHook != nil { + ReceiveHook(rv, n, err) + } + + if err == nil && (rv.Status != gomemcached.SUCCESS && rv.Status != gomemcached.AUTH_CONTINUE) { + err = rv + } + return rv, n, err +} + +// TransmitHook is called after each packet is transmitted. +var TransmitHook func(*gomemcached.MCRequest, int, error) + +func transmitRequest(o io.Writer, req *gomemcached.MCRequest) (int, error) { + if o == nil { + return 0, errNoConn + } + n, err := req.Transmit(o) + if TransmitHook != nil { + TransmitHook(req, n, err) + } + return n, err +} + +func transmitResponse(o io.Writer, res *gomemcached.MCResponse) (int, error) { + if o == nil { + return 0, errNoConn + } + n, err := res.Transmit(o) + return n, err +} diff --git a/vendor/github.com/couchbase/gomemcached/client/upr_feed.go b/vendor/github.com/couchbase/gomemcached/client/upr_feed.go new file mode 100644 index 0000000000..dc737e6cc0 --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/client/upr_feed.go @@ -0,0 +1,1005 @@ +// go implementation of upr client. +// See https://github.com/couchbaselabs/cbupr/blob/master/transport-spec.md +// TODO +// 1. Use a pool allocator to avoid garbage +package memcached + +import ( + "encoding/binary" + "errors" + "fmt" + "github.com/couchbase/gomemcached" + "github.com/couchbase/goutils/logging" + "strconv" + "sync" + "sync/atomic" +) + +const uprMutationExtraLen = 30 +const uprDeletetionExtraLen = 18 +const uprDeletetionWithDeletionTimeExtraLen = 21 +const uprSnapshotExtraLen = 20 +const bufferAckThreshold = 0.2 +const opaqueOpen = 0xBEAF0001 +const opaqueFailover = 0xDEADBEEF +const uprDefaultNoopInterval = 120 + +// Counter on top of opaqueOpen that others can draw from for open and control msgs +var opaqueOpenCtrlWell uint32 = opaqueOpen + +// UprEvent memcached events for UPR streams. +type UprEvent struct { + Opcode gomemcached.CommandCode // Type of event + Status gomemcached.Status // Response status + VBucket uint16 // VBucket this event applies to + DataType uint8 // data type + Opaque uint16 // 16 MSB of opaque + VBuuid uint64 // This field is set by downstream + Flags uint32 // Item flags + Expiry uint32 // Item expiration time + Key, Value []byte // Item key/value + OldValue []byte // TODO: TBD: old document value + Cas uint64 // CAS value of the item + Seqno uint64 // sequence number of the mutation + RevSeqno uint64 // rev sequence number : deletions + LockTime uint32 // Lock time + MetadataSize uint16 // Metadata size + SnapstartSeq uint64 // start sequence number of this snapshot + SnapendSeq uint64 // End sequence number of the snapshot + SnapshotType uint32 // 0: disk 1: memory + FailoverLog *FailoverLog // Failover log containing vvuid and sequnce number + Error error // Error value in case of a failure + ExtMeta []byte + AckSize uint32 // The number of bytes that can be Acked to DCP +} + +// UprStream is per stream data structure over an UPR Connection. +type UprStream struct { + Vbucket uint16 // Vbucket id + Vbuuid uint64 // vbucket uuid + StartSeq uint64 // start sequence number + EndSeq uint64 // end sequence number + connected bool +} + +const ( + CompressionTypeStartMarker = iota // also means invalid + CompressionTypeNone = iota + CompressionTypeSnappy = iota + CompressionTypeEndMarker = iota // also means invalid +) + +// kv_engine/include/mcbp/protocol/datatype.h +const ( + JSONDataType uint8 = 1 + SnappyDataType uint8 = 2 + XattrDataType uint8 = 4 +) + +type UprFeatures struct { + Xattribute bool + CompressionType int + IncludeDeletionTime bool +} + +/** + * Used to handle multiple concurrent calls UprRequestStream() by UprFeed clients + * It is expected that a client that calls UprRequestStream() more than once should issue + * different "opaque" (version) numbers + */ +type opaqueStreamMap map[uint16]*UprStream // opaque -> stream + +type vbStreamNegotiator struct { + vbHandshakeMap map[uint16]opaqueStreamMap // vbno -> opaqueStreamMap + mutex sync.RWMutex +} + +func (negotiator *vbStreamNegotiator) initialize() { + negotiator.mutex.Lock() + negotiator.vbHandshakeMap = make(map[uint16]opaqueStreamMap) + negotiator.mutex.Unlock() +} + +func (negotiator *vbStreamNegotiator) registerRequest(vbno, opaque uint16, vbuuid, startSequence, endSequence uint64) { + negotiator.mutex.Lock() + defer negotiator.mutex.Unlock() + + var osMap opaqueStreamMap + var ok bool + if osMap, ok = negotiator.vbHandshakeMap[vbno]; !ok { + osMap = make(opaqueStreamMap) + negotiator.vbHandshakeMap[vbno] = osMap + } + + if _, ok = osMap[opaque]; !ok { + osMap[opaque] = &UprStream{ + Vbucket: vbno, + Vbuuid: vbuuid, + StartSeq: startSequence, + EndSeq: endSequence, + } + } +} + +func (negotiator *vbStreamNegotiator) getStreamsCntFromMap(vbno uint16) int { + negotiator.mutex.RLock() + defer negotiator.mutex.RUnlock() + + osmap, ok := negotiator.vbHandshakeMap[vbno] + if !ok { + return 0 + } else { + return len(osmap) + } +} + +func (negotiator *vbStreamNegotiator) getStreamFromMap(vbno, opaque uint16) (*UprStream, error) { + negotiator.mutex.RLock() + defer negotiator.mutex.RUnlock() + + osmap, ok := negotiator.vbHandshakeMap[vbno] + if !ok { + return nil, fmt.Errorf("Error: stream for vb: %v does not exist", vbno) + } + + stream, ok := osmap[opaque] + if !ok { + return nil, fmt.Errorf("Error: stream for vb: %v opaque: %v does not exist", vbno, opaque) + } + return stream, nil +} + +func (negotiator *vbStreamNegotiator) deleteStreamFromMap(vbno, opaque uint16) { + negotiator.mutex.Lock() + defer negotiator.mutex.Unlock() + + osmap, ok := negotiator.vbHandshakeMap[vbno] + if !ok { + return + } + + delete(osmap, opaque) + if len(osmap) == 0 { + delete(negotiator.vbHandshakeMap, vbno) + } +} + +func (negotiator *vbStreamNegotiator) handleStreamRequest(feed *UprFeed, + headerBuf [gomemcached.HDR_LEN]byte, pktPtr *gomemcached.MCRequest, bytesReceivedFromDCP int, + response *gomemcached.MCResponse) (*UprEvent, error) { + var event *UprEvent + + if feed == nil || response == nil || pktPtr == nil { + return nil, errors.New("Invalid inputs") + } + + // Get Stream from negotiator map + vbno := vbOpaque(response.Opaque) + opaque := appOpaque(response.Opaque) + + stream, err := negotiator.getStreamFromMap(vbno, opaque) + if err != nil { + err = fmt.Errorf("Stream not found for vb %d appOpaque %v: %#v", vbno, appOpaque, *pktPtr) + logging.Errorf(err.Error()) + return nil, err + } + + status, rb, flog, err := handleStreamRequest(response, headerBuf[:]) + + if status == gomemcached.ROLLBACK { + event = makeUprEvent(*pktPtr, stream, bytesReceivedFromDCP) + event.Status = status + // rollback stream + logging.Infof("UPR_STREAMREQ with rollback %d for vb %d Failed: %v", rb, vbno, err) + negotiator.deleteStreamFromMap(vbno, opaque) + } else if status == gomemcached.SUCCESS { + event = makeUprEvent(*pktPtr, stream, bytesReceivedFromDCP) + event.Seqno = stream.StartSeq + event.FailoverLog = flog + event.Status = status + feed.activateStream(vbno, opaque, stream) + feed.negotiator.deleteStreamFromMap(vbno, opaque) + logging.Infof("UPR_STREAMREQ for vb %d successful", vbno) + + } else if err != nil { + logging.Errorf("UPR_STREAMREQ for vbucket %d erro %s", vbno, err.Error()) + event = &UprEvent{ + Opcode: gomemcached.UPR_STREAMREQ, + Status: status, + VBucket: vbno, + Error: err, + } + negotiator.deleteStreamFromMap(vbno, opaque) + } + return event, nil +} + +func (negotiator *vbStreamNegotiator) cleanUpVbStreams(vbno uint16) { + negotiator.mutex.Lock() + defer negotiator.mutex.Unlock() + + delete(negotiator.vbHandshakeMap, vbno) +} + +// UprFeed represents an UPR feed. A feed contains a connection to a single +// host and multiple vBuckets +type UprFeed struct { + // lock for feed.vbstreams + muVbstreams sync.RWMutex + // lock for feed.closed + muClosed sync.RWMutex + C <-chan *UprEvent // Exported channel for receiving UPR events + negotiator vbStreamNegotiator // Used for pre-vbstreams, concurrent vb stream negotiation + vbstreams map[uint16]*UprStream // official live vb->stream mapping + closer chan bool // closer + conn *Client // connection to UPR producer + Error error // error + bytesRead uint64 // total bytes read on this connection + toAckBytes uint32 // bytes client has read + maxAckBytes uint32 // Max buffer control ack bytes + stats UprStats // Stats for upr client + transmitCh chan *gomemcached.MCRequest // transmit command channel + transmitCl chan bool // closer channel for transmit go-routine + closed bool // flag indicating whether the feed has been closed + // flag indicating whether client of upr feed will send ack to upr feed + // if flag is true, upr feed will use ack from client to determine whether/when to send ack to DCP + // if flag is false, upr feed will track how many bytes it has sent to client + // and use that to determine whether/when to send ack to DCP + ackByClient bool +} + +// Exported interface - to allow for mocking +type UprFeedIface interface { + Close() + Closed() bool + CloseStream(vbno, opaqueMSB uint16) error + GetError() error + GetUprStats() *UprStats + ClientAck(event *UprEvent) error + GetUprEventCh() <-chan *UprEvent + StartFeed() error + StartFeedWithConfig(datachan_len int) error + UprOpen(name string, sequence uint32, bufSize uint32) error + UprOpenWithXATTR(name string, sequence uint32, bufSize uint32) error + UprOpenWithFeatures(name string, sequence uint32, bufSize uint32, features UprFeatures) (error, UprFeatures) + UprRequestStream(vbno, opaqueMSB uint16, flags uint32, vuuid, startSequence, endSequence, snapStart, snapEnd uint64) error +} + +type UprStats struct { + TotalBytes uint64 + TotalMutation uint64 + TotalBufferAckSent uint64 + TotalSnapShot uint64 +} + +// FailoverLog containing vvuid and sequnce number +type FailoverLog [][2]uint64 + +// error codes +var ErrorInvalidLog = errors.New("couchbase.errorInvalidLog") + +func (flogp *FailoverLog) Latest() (vbuuid, seqno uint64, err error) { + if flogp != nil { + flog := *flogp + latest := flog[len(flog)-1] + return latest[0], latest[1], nil + } + return vbuuid, seqno, ErrorInvalidLog +} + +func makeUprEvent(rq gomemcached.MCRequest, stream *UprStream, bytesReceivedFromDCP int) *UprEvent { + event := &UprEvent{ + Opcode: rq.Opcode, + VBucket: stream.Vbucket, + VBuuid: stream.Vbuuid, + Key: rq.Key, + Value: rq.Body, + Cas: rq.Cas, + ExtMeta: rq.ExtMeta, + DataType: rq.DataType, + } + + // set AckSize for events that need to be acked to DCP, + // i.e., events with CommandCodes that need to be buffered in DCP + if _, ok := gomemcached.BufferedCommandCodeMap[rq.Opcode]; ok { + event.AckSize = uint32(bytesReceivedFromDCP) + } + + // 16 LSBits are used by client library to encode vbucket number. + // 16 MSBits are left for application to multiplex on opaque value. + event.Opaque = appOpaque(rq.Opaque) + + if len(rq.Extras) >= uprMutationExtraLen && + event.Opcode == gomemcached.UPR_MUTATION { + + event.Seqno = binary.BigEndian.Uint64(rq.Extras[:8]) + event.RevSeqno = binary.BigEndian.Uint64(rq.Extras[8:16]) + event.Flags = binary.BigEndian.Uint32(rq.Extras[16:20]) + event.Expiry = binary.BigEndian.Uint32(rq.Extras[20:24]) + event.LockTime = binary.BigEndian.Uint32(rq.Extras[24:28]) + event.MetadataSize = binary.BigEndian.Uint16(rq.Extras[28:30]) + + } else if len(rq.Extras) >= uprDeletetionWithDeletionTimeExtraLen && + event.Opcode == gomemcached.UPR_DELETION { + + event.Seqno = binary.BigEndian.Uint64(rq.Extras[:8]) + event.RevSeqno = binary.BigEndian.Uint64(rq.Extras[8:16]) + event.Expiry = binary.BigEndian.Uint32(rq.Extras[16:20]) + + } else if len(rq.Extras) >= uprDeletetionExtraLen && + event.Opcode == gomemcached.UPR_DELETION || + event.Opcode == gomemcached.UPR_EXPIRATION { + + event.Seqno = binary.BigEndian.Uint64(rq.Extras[:8]) + event.RevSeqno = binary.BigEndian.Uint64(rq.Extras[8:16]) + event.MetadataSize = binary.BigEndian.Uint16(rq.Extras[16:18]) + + } else if len(rq.Extras) >= uprSnapshotExtraLen && + event.Opcode == gomemcached.UPR_SNAPSHOT { + + event.SnapstartSeq = binary.BigEndian.Uint64(rq.Extras[:8]) + event.SnapendSeq = binary.BigEndian.Uint64(rq.Extras[8:16]) + event.SnapshotType = binary.BigEndian.Uint32(rq.Extras[16:20]) + } + + return event +} + +func (event *UprEvent) String() string { + name := gomemcached.CommandNames[event.Opcode] + if name == "" { + name = fmt.Sprintf("#%d", event.Opcode) + } + return name +} + +func (event *UprEvent) IsSnappyDataType() bool { + return event.Opcode == gomemcached.UPR_MUTATION && (event.DataType&SnappyDataType > 0) +} + +func (feed *UprFeed) sendCommands(mc *Client) { + transmitCh := feed.transmitCh + transmitCl := feed.transmitCl +loop: + for { + select { + case command := <-transmitCh: + if err := mc.Transmit(command); err != nil { + logging.Errorf("Failed to transmit command %s. Error %s", command.Opcode.String(), err.Error()) + // get feed to close and runFeed routine to exit + feed.Close() + break loop + } + + case <-transmitCl: + break loop + } + } + + // After sendCommands exits, write to transmitCh will block forever + // when we write to transmitCh, e.g., at CloseStream(), we need to check feed closure to have an exit route + + logging.Infof("sendCommands exiting") +} + +// Sets the specified stream as the connected stream for this vbno, and also cleans up negotiator +func (feed *UprFeed) activateStream(vbno, opaque uint16, stream *UprStream) error { + feed.muVbstreams.Lock() + defer feed.muVbstreams.Unlock() + + // Set this stream as the officially connected stream for this vb + stream.connected = true + feed.vbstreams[vbno] = stream + return nil +} + +func (feed *UprFeed) cleanUpVbStream(vbno uint16) { + feed.muVbstreams.Lock() + defer feed.muVbstreams.Unlock() + + delete(feed.vbstreams, vbno) +} + +// NewUprFeed creates a new UPR Feed. +// TODO: Describe side-effects on bucket instance and its connection pool. +func (mc *Client) NewUprFeed() (*UprFeed, error) { + return mc.NewUprFeedWithConfig(false /*ackByClient*/) +} + +func (mc *Client) NewUprFeedWithConfig(ackByClient bool) (*UprFeed, error) { + + feed := &UprFeed{ + conn: mc, + closer: make(chan bool, 1), + vbstreams: make(map[uint16]*UprStream), + transmitCh: make(chan *gomemcached.MCRequest), + transmitCl: make(chan bool), + ackByClient: ackByClient, + } + + feed.negotiator.initialize() + + go feed.sendCommands(mc) + return feed, nil +} + +func (mc *Client) NewUprFeedIface() (UprFeedIface, error) { + return mc.NewUprFeed() +} + +func (mc *Client) NewUprFeedWithConfigIface(ackByClient bool) (UprFeedIface, error) { + return mc.NewUprFeedWithConfig(ackByClient) +} + +func doUprOpen(mc *Client, name string, sequence uint32, features UprFeatures) error { + rq := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_OPEN, + Key: []byte(name), + Opaque: getUprOpenCtrlOpaque(), + } + + rq.Extras = make([]byte, 8) + binary.BigEndian.PutUint32(rq.Extras[:4], sequence) + + // opens a producer type connection + flags := gomemcached.DCP_PRODUCER + if features.Xattribute { + flags = flags | gomemcached.DCP_OPEN_INCLUDE_XATTRS + } + if features.IncludeDeletionTime { + flags = flags | gomemcached.DCP_OPEN_INCLUDE_DELETE_TIMES + } + binary.BigEndian.PutUint32(rq.Extras[4:], flags) + + return sendMcRequestSync(mc, rq) +} + +// Synchronously send a memcached request and wait for the response +func sendMcRequestSync(mc *Client, req *gomemcached.MCRequest) error { + if err := mc.Transmit(req); err != nil { + return err + } + + if res, err := mc.Receive(); err != nil { + return err + } else if req.Opcode != res.Opcode { + return fmt.Errorf("unexpected #opcode sent %v received %v", req.Opcode, res.Opaque) + } else if req.Opaque != res.Opaque { + return fmt.Errorf("opaque mismatch, sent %v received %v", req.Opaque, res.Opaque) + } else if res.Status != gomemcached.SUCCESS { + return fmt.Errorf("error %v", res.Status) + } + return nil +} + +// UprOpen to connect with a UPR producer. +// Name: name of te UPR connection +// sequence: sequence number for the connection +// bufsize: max size of the application +func (feed *UprFeed) UprOpen(name string, sequence uint32, bufSize uint32) error { + var allFeaturesDisabled UprFeatures + err, _ := feed.uprOpen(name, sequence, bufSize, allFeaturesDisabled) + return err +} + +// UprOpen with XATTR enabled. +func (feed *UprFeed) UprOpenWithXATTR(name string, sequence uint32, bufSize uint32) error { + var onlyXattrEnabled UprFeatures + onlyXattrEnabled.Xattribute = true + err, _ := feed.uprOpen(name, sequence, bufSize, onlyXattrEnabled) + return err +} + +func (feed *UprFeed) UprOpenWithFeatures(name string, sequence uint32, bufSize uint32, features UprFeatures) (error, UprFeatures) { + return feed.uprOpen(name, sequence, bufSize, features) +} + +func (feed *UprFeed) uprOpen(name string, sequence uint32, bufSize uint32, features UprFeatures) (err error, activatedFeatures UprFeatures) { + mc := feed.conn + + // First set this to an invalid value to state that the method hasn't gotten to executing this control yet + activatedFeatures.CompressionType = CompressionTypeEndMarker + + if err = doUprOpen(mc, name, sequence, features); err != nil { + return + } + + activatedFeatures.Xattribute = features.Xattribute + + // send a UPR control message to set the window size for the this connection + if bufSize > 0 { + rq := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_CONTROL, + Key: []byte("connection_buffer_size"), + Body: []byte(strconv.Itoa(int(bufSize))), + Opaque: getUprOpenCtrlOpaque(), + } + err = sendMcRequestSync(feed.conn, rq) + if err != nil { + return + } + feed.maxAckBytes = uint32(bufferAckThreshold * float32(bufSize)) + } + + // enable noop and set noop interval + rq := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_CONTROL, + Key: []byte("enable_noop"), + Body: []byte("true"), + Opaque: getUprOpenCtrlOpaque(), + } + err = sendMcRequestSync(feed.conn, rq) + if err != nil { + return + } + + rq = &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_CONTROL, + Key: []byte("set_noop_interval"), + Body: []byte(strconv.Itoa(int(uprDefaultNoopInterval))), + Opaque: getUprOpenCtrlOpaque(), + } + err = sendMcRequestSync(feed.conn, rq) + if err != nil { + return + } + + if features.CompressionType == CompressionTypeSnappy { + activatedFeatures.CompressionType = CompressionTypeNone + rq = &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_CONTROL, + Key: []byte("force_value_compression"), + Body: []byte("true"), + Opaque: getUprOpenCtrlOpaque(), + } + err = sendMcRequestSync(feed.conn, rq) + } else if features.CompressionType == CompressionTypeEndMarker { + err = fmt.Errorf("UPR_CONTROL Failed - Invalid CompressionType: %v", features.CompressionType) + } + if err != nil { + return + } + activatedFeatures.CompressionType = features.CompressionType + + return +} + +// UprGetFailoverLog for given list of vbuckets. +func (mc *Client) UprGetFailoverLog( + vb []uint16) (map[uint16]*FailoverLog, error) { + + rq := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_FAILOVERLOG, + Opaque: opaqueFailover, + } + + var allFeaturesDisabled UprFeatures + if err := doUprOpen(mc, "FailoverLog", 0, allFeaturesDisabled); err != nil { + return nil, fmt.Errorf("UPR_OPEN Failed %s", err.Error()) + } + + failoverLogs := make(map[uint16]*FailoverLog) + for _, vBucket := range vb { + rq.VBucket = vBucket + if err := mc.Transmit(rq); err != nil { + return nil, err + } + res, err := mc.Receive() + + if err != nil { + return nil, fmt.Errorf("failed to receive %s", err.Error()) + } else if res.Opcode != gomemcached.UPR_FAILOVERLOG || res.Status != gomemcached.SUCCESS { + return nil, fmt.Errorf("unexpected #opcode %v", res.Opcode) + } + + flog, err := parseFailoverLog(res.Body) + if err != nil { + return nil, fmt.Errorf("unable to parse failover logs for vb %d", vb) + } + failoverLogs[vBucket] = flog + } + + return failoverLogs, nil +} + +// UprRequestStream for a single vbucket. +func (feed *UprFeed) UprRequestStream(vbno, opaqueMSB uint16, flags uint32, + vuuid, startSequence, endSequence, snapStart, snapEnd uint64) error { + + rq := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_STREAMREQ, + VBucket: vbno, + Opaque: composeOpaque(vbno, opaqueMSB), + } + + rq.Extras = make([]byte, 48) // #Extras + binary.BigEndian.PutUint32(rq.Extras[:4], flags) + binary.BigEndian.PutUint32(rq.Extras[4:8], uint32(0)) + binary.BigEndian.PutUint64(rq.Extras[8:16], startSequence) + binary.BigEndian.PutUint64(rq.Extras[16:24], endSequence) + binary.BigEndian.PutUint64(rq.Extras[24:32], vuuid) + binary.BigEndian.PutUint64(rq.Extras[32:40], snapStart) + binary.BigEndian.PutUint64(rq.Extras[40:48], snapEnd) + + feed.negotiator.registerRequest(vbno, opaqueMSB, vuuid, startSequence, endSequence) + // Any client that has ever called this method, regardless of return code, + // should expect a potential UPR_CLOSESTREAM message due to this new map entry prior to Transmit. + + if err := feed.conn.Transmit(rq); err != nil { + logging.Errorf("Error in StreamRequest %s", err.Error()) + // If an error occurs during transmit, then the UPRFeed will keep the stream + // in the vbstreams map. This is to prevent nil lookup from any previously + // sent stream requests. + return err + } + + return nil +} + +// CloseStream for specified vbucket. +func (feed *UprFeed) CloseStream(vbno, opaqueMSB uint16) error { + + err := feed.validateCloseStream(vbno) + if err != nil { + logging.Infof("CloseStream for %v has been skipped because of error %v", vbno, err) + return err + } + + closeStream := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_CLOSESTREAM, + VBucket: vbno, + Opaque: composeOpaque(vbno, opaqueMSB), + } + + feed.writeToTransmitCh(closeStream) + + return nil +} + +func (feed *UprFeed) GetUprEventCh() <-chan *UprEvent { + return feed.C +} + +func (feed *UprFeed) GetError() error { + return feed.Error +} + +func (feed *UprFeed) validateCloseStream(vbno uint16) error { + feed.muVbstreams.RLock() + nilVbStream := feed.vbstreams[vbno] == nil + feed.muVbstreams.RUnlock() + + if nilVbStream && (feed.negotiator.getStreamsCntFromMap(vbno) == 0) { + return fmt.Errorf("Stream for vb %d has not been requested", vbno) + } + + return nil +} + +func (feed *UprFeed) writeToTransmitCh(rq *gomemcached.MCRequest) error { + // write to transmitCh may block forever if sendCommands has exited + // check for feed closure to have an exit route in this case + select { + case <-feed.closer: + errMsg := fmt.Sprintf("Abort sending request to transmitCh because feed has been closed. request=%v", rq) + logging.Infof(errMsg) + return errors.New(errMsg) + case feed.transmitCh <- rq: + } + return nil +} + +// StartFeed to start the upper feed. +func (feed *UprFeed) StartFeed() error { + return feed.StartFeedWithConfig(10) +} + +func (feed *UprFeed) StartFeedWithConfig(datachan_len int) error { + ch := make(chan *UprEvent, datachan_len) + feed.C = ch + go feed.runFeed(ch) + return nil +} + +func parseFailoverLog(body []byte) (*FailoverLog, error) { + + if len(body)%16 != 0 { + err := fmt.Errorf("invalid body length %v, in failover-log", len(body)) + return nil, err + } + log := make(FailoverLog, len(body)/16) + for i, j := 0, 0; i < len(body); i += 16 { + vuuid := binary.BigEndian.Uint64(body[i : i+8]) + seqno := binary.BigEndian.Uint64(body[i+8 : i+16]) + log[j] = [2]uint64{vuuid, seqno} + j++ + } + return &log, nil +} + +func handleStreamRequest( + res *gomemcached.MCResponse, + headerBuf []byte, +) (gomemcached.Status, uint64, *FailoverLog, error) { + + var rollback uint64 + var err error + + switch { + case res.Status == gomemcached.ROLLBACK: + logging.Infof("Rollback response. body=%v, headerBuf=%v\n", res.Body, headerBuf) + rollback = binary.BigEndian.Uint64(res.Body) + logging.Infof("Rollback seqno is %v for response with opaque %v\n", rollback, res.Opaque) + return res.Status, rollback, nil, nil + + case res.Status != gomemcached.SUCCESS: + err = fmt.Errorf("unexpected status %v for response with opaque %v", res.Status, res.Opaque) + return res.Status, 0, nil, err + } + + flog, err := parseFailoverLog(res.Body[:]) + return res.Status, rollback, flog, err +} + +// generate stream end responses for all active vb streams +func (feed *UprFeed) doStreamClose(ch chan *UprEvent) { + feed.muVbstreams.RLock() + + uprEvents := make([]*UprEvent, len(feed.vbstreams)) + index := 0 + for vbno, stream := range feed.vbstreams { + uprEvent := &UprEvent{ + VBucket: vbno, + VBuuid: stream.Vbuuid, + Opcode: gomemcached.UPR_STREAMEND, + } + uprEvents[index] = uprEvent + index++ + } + + // release the lock before sending uprEvents to ch, which may block + feed.muVbstreams.RUnlock() + +loop: + for _, uprEvent := range uprEvents { + select { + case ch <- uprEvent: + case <-feed.closer: + logging.Infof("Feed has been closed. Aborting doStreamClose.") + break loop + } + } +} + +func (feed *UprFeed) runFeed(ch chan *UprEvent) { + defer close(ch) + var headerBuf [gomemcached.HDR_LEN]byte + var pkt gomemcached.MCRequest + var event *UprEvent + + mc := feed.conn.Hijack() + uprStats := &feed.stats + +loop: + for { + select { + case <-feed.closer: + logging.Infof("Feed has been closed. Exiting.") + break loop + default: + bytes, err := pkt.Receive(mc, headerBuf[:]) + if err != nil { + logging.Errorf("Error in receive %s", err.Error()) + feed.Error = err + // send all the stream close messages to the client + feed.doStreamClose(ch) + break loop + } else { + event = nil + res := &gomemcached.MCResponse{ + Opcode: pkt.Opcode, + Cas: pkt.Cas, + Opaque: pkt.Opaque, + Status: gomemcached.Status(pkt.VBucket), + Extras: pkt.Extras, + Key: pkt.Key, + Body: pkt.Body, + } + + vb := vbOpaque(pkt.Opaque) + appOpaque := appOpaque(pkt.Opaque) + uprStats.TotalBytes = uint64(bytes) + + feed.muVbstreams.RLock() + stream := feed.vbstreams[vb] + feed.muVbstreams.RUnlock() + + switch pkt.Opcode { + case gomemcached.UPR_STREAMREQ: + event, err = feed.negotiator.handleStreamRequest(feed, headerBuf, &pkt, bytes, res) + if err != nil { + logging.Infof(err.Error()) + break loop + } + case gomemcached.UPR_MUTATION, + gomemcached.UPR_DELETION, + gomemcached.UPR_EXPIRATION: + if stream == nil { + logging.Infof("Stream not found for vb %d: %#v", vb, pkt) + break loop + } + event = makeUprEvent(pkt, stream, bytes) + uprStats.TotalMutation++ + + case gomemcached.UPR_STREAMEND: + if stream == nil { + logging.Infof("Stream not found for vb %d: %#v", vb, pkt) + break loop + } + //stream has ended + event = makeUprEvent(pkt, stream, bytes) + logging.Infof("Stream Ended for vb %d", vb) + + feed.negotiator.deleteStreamFromMap(vb, appOpaque) + feed.cleanUpVbStream(vb) + + case gomemcached.UPR_SNAPSHOT: + if stream == nil { + logging.Infof("Stream not found for vb %d: %#v", vb, pkt) + break loop + } + // snapshot marker + event = makeUprEvent(pkt, stream, bytes) + uprStats.TotalSnapShot++ + + case gomemcached.UPR_FLUSH: + if stream == nil { + logging.Infof("Stream not found for vb %d: %#v", vb, pkt) + break loop + } + // special processing for flush ? + event = makeUprEvent(pkt, stream, bytes) + + case gomemcached.UPR_CLOSESTREAM: + if stream == nil { + logging.Infof("Stream not found for vb %d: %#v", vb, pkt) + break loop + } + event = makeUprEvent(pkt, stream, bytes) + event.Opcode = gomemcached.UPR_STREAMEND // opcode re-write !! + logging.Infof("Stream Closed for vb %d StreamEnd simulated", vb) + + feed.negotiator.deleteStreamFromMap(vb, appOpaque) + feed.cleanUpVbStream(vb) + + case gomemcached.UPR_ADDSTREAM: + logging.Infof("Opcode %v not implemented", pkt.Opcode) + + case gomemcached.UPR_CONTROL, gomemcached.UPR_BUFFERACK: + if res.Status != gomemcached.SUCCESS { + logging.Infof("Opcode %v received status %d", pkt.Opcode.String(), res.Status) + } + + case gomemcached.UPR_NOOP: + // send a NOOP back + noop := &gomemcached.MCResponse{ + Opcode: gomemcached.UPR_NOOP, + Opaque: pkt.Opaque, + } + + if err := feed.conn.TransmitResponse(noop); err != nil { + logging.Warnf("failed to transmit command %s. Error %s", noop.Opcode.String(), err.Error()) + } + default: + logging.Infof("Recived an unknown response for vbucket %d", vb) + } + } + + if event != nil { + select { + case ch <- event: + case <-feed.closer: + logging.Infof("Feed has been closed. Skip sending events. Exiting.") + break loop + } + + feed.muVbstreams.RLock() + l := len(feed.vbstreams) + feed.muVbstreams.RUnlock() + + if event.Opcode == gomemcached.UPR_CLOSESTREAM && l == 0 { + logging.Infof("No more streams") + } + } + + if !feed.ackByClient { + // if client does not ack, do the ack check now + feed.sendBufferAckIfNeeded(event) + } + } + } + + // make sure that feed is closed before we signal transmitCl and exit runFeed + feed.Close() + + close(feed.transmitCl) + logging.Infof("runFeed exiting") +} + +// Client, after completing processing of an UprEvent, need to call this API to notify UprFeed, +// so that UprFeed can update its ack bytes stats and send ack to DCP if needed +// Client needs to set ackByClient flag to true in NewUprFeedWithConfig() call as a prerequisite for this call to work +// This API is not thread safe. Caller should NOT have more than one go rountine calling this API +func (feed *UprFeed) ClientAck(event *UprEvent) error { + if !feed.ackByClient { + return errors.New("Upr feed does not have ackByclient flag set") + } + feed.sendBufferAckIfNeeded(event) + return nil +} + +// increment ack bytes if the event needs to be acked to DCP +// send buffer ack if enough ack bytes have been accumulated +func (feed *UprFeed) sendBufferAckIfNeeded(event *UprEvent) { + if event == nil || event.AckSize == 0 { + // this indicates that there is no need to ack to DCP + return + } + + totalBytes := feed.toAckBytes + event.AckSize + if totalBytes > feed.maxAckBytes { + feed.toAckBytes = 0 + feed.sendBufferAck(totalBytes) + } else { + feed.toAckBytes = totalBytes + } +} + +// send buffer ack to dcp +func (feed *UprFeed) sendBufferAck(sendSize uint32) { + bufferAck := &gomemcached.MCRequest{ + Opcode: gomemcached.UPR_BUFFERACK, + } + bufferAck.Extras = make([]byte, 4) + binary.BigEndian.PutUint32(bufferAck.Extras[:4], uint32(sendSize)) + feed.writeToTransmitCh(bufferAck) + feed.stats.TotalBufferAckSent++ +} + +func (feed *UprFeed) GetUprStats() *UprStats { + return &feed.stats +} + +func composeOpaque(vbno, opaqueMSB uint16) uint32 { + return (uint32(opaqueMSB) << 16) | uint32(vbno) +} + +func getUprOpenCtrlOpaque() uint32 { + return atomic.AddUint32(&opaqueOpenCtrlWell, 1) +} + +func appOpaque(opq32 uint32) uint16 { + return uint16((opq32 & 0xFFFF0000) >> 16) +} + +func vbOpaque(opq32 uint32) uint16 { + return uint16(opq32 & 0xFFFF) +} + +// Close this UprFeed. +func (feed *UprFeed) Close() { + feed.muClosed.Lock() + defer feed.muClosed.Unlock() + if !feed.closed { + close(feed.closer) + feed.closed = true + feed.negotiator.initialize() + } +} + +// check if the UprFeed has been closed +func (feed *UprFeed) Closed() bool { + feed.muClosed.RLock() + defer feed.muClosed.RUnlock() + return feed.closed +} diff --git a/vendor/github.com/couchbase/gomemcached/mc_constants.go b/vendor/github.com/couchbase/gomemcached/mc_constants.go new file mode 100644 index 0000000000..1d5027d16c --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/mc_constants.go @@ -0,0 +1,335 @@ +// Package gomemcached is binary protocol packet formats and constants. +package gomemcached + +import ( + "fmt" +) + +const ( + REQ_MAGIC = 0x80 + RES_MAGIC = 0x81 +) + +// CommandCode for memcached packets. +type CommandCode uint8 + +const ( + GET = CommandCode(0x00) + SET = CommandCode(0x01) + ADD = CommandCode(0x02) + REPLACE = CommandCode(0x03) + DELETE = CommandCode(0x04) + INCREMENT = CommandCode(0x05) + DECREMENT = CommandCode(0x06) + QUIT = CommandCode(0x07) + FLUSH = CommandCode(0x08) + GETQ = CommandCode(0x09) + NOOP = CommandCode(0x0a) + VERSION = CommandCode(0x0b) + GETK = CommandCode(0x0c) + GETKQ = CommandCode(0x0d) + APPEND = CommandCode(0x0e) + PREPEND = CommandCode(0x0f) + STAT = CommandCode(0x10) + SETQ = CommandCode(0x11) + ADDQ = CommandCode(0x12) + REPLACEQ = CommandCode(0x13) + DELETEQ = CommandCode(0x14) + INCREMENTQ = CommandCode(0x15) + DECREMENTQ = CommandCode(0x16) + QUITQ = CommandCode(0x17) + FLUSHQ = CommandCode(0x18) + APPENDQ = CommandCode(0x19) + AUDIT = CommandCode(0x27) + PREPENDQ = CommandCode(0x1a) + GAT = CommandCode(0x1d) + HELLO = CommandCode(0x1f) + RGET = CommandCode(0x30) + RSET = CommandCode(0x31) + RSETQ = CommandCode(0x32) + RAPPEND = CommandCode(0x33) + RAPPENDQ = CommandCode(0x34) + RPREPEND = CommandCode(0x35) + RPREPENDQ = CommandCode(0x36) + RDELETE = CommandCode(0x37) + RDELETEQ = CommandCode(0x38) + RINCR = CommandCode(0x39) + RINCRQ = CommandCode(0x3a) + RDECR = CommandCode(0x3b) + RDECRQ = CommandCode(0x3c) + + SASL_LIST_MECHS = CommandCode(0x20) + SASL_AUTH = CommandCode(0x21) + SASL_STEP = CommandCode(0x22) + + SET_VBUCKET = CommandCode(0x3d) + + TAP_CONNECT = CommandCode(0x40) // Client-sent request to initiate Tap feed + TAP_MUTATION = CommandCode(0x41) // Notification of a SET/ADD/REPLACE/etc. on the server + TAP_DELETE = CommandCode(0x42) // Notification of a DELETE on the server + TAP_FLUSH = CommandCode(0x43) // Replicates a flush_all command + TAP_OPAQUE = CommandCode(0x44) // Opaque control data from the engine + TAP_VBUCKET_SET = CommandCode(0x45) // Sets state of vbucket in receiver (used in takeover) + TAP_CHECKPOINT_START = CommandCode(0x46) // Notifies start of new checkpoint + TAP_CHECKPOINT_END = CommandCode(0x47) // Notifies end of checkpoint + + UPR_OPEN = CommandCode(0x50) // Open a UPR connection with a name + UPR_ADDSTREAM = CommandCode(0x51) // Sent by ebucketMigrator to UPR Consumer + UPR_CLOSESTREAM = CommandCode(0x52) // Sent by eBucketMigrator to UPR Consumer + UPR_FAILOVERLOG = CommandCode(0x54) // Request failover logs + UPR_STREAMREQ = CommandCode(0x53) // Stream request from consumer to producer + UPR_STREAMEND = CommandCode(0x55) // Sent by producer when it has no more messages to stream + UPR_SNAPSHOT = CommandCode(0x56) // Start of a new snapshot + UPR_MUTATION = CommandCode(0x57) // Key mutation + UPR_DELETION = CommandCode(0x58) // Key deletion + UPR_EXPIRATION = CommandCode(0x59) // Key expiration + UPR_FLUSH = CommandCode(0x5a) // Delete all the data for a vbucket + UPR_NOOP = CommandCode(0x5c) // UPR NOOP + UPR_BUFFERACK = CommandCode(0x5d) // UPR Buffer Acknowledgement + UPR_CONTROL = CommandCode(0x5e) // Set flow control params + + SELECT_BUCKET = CommandCode(0x89) // Select bucket + + OBSERVE_SEQNO = CommandCode(0x91) // Sequence Number based Observe + OBSERVE = CommandCode(0x92) + + GET_META = CommandCode(0xA0) // Get meta. returns with expiry, flags, cas etc + SUBDOC_GET = CommandCode(0xc5) // Get subdoc. Returns with xattrs + SUBDOC_MULTI_LOOKUP = CommandCode(0xd0) // Multi lookup. Doc xattrs and meta. +) + +// command codes that are counted toward DCP control buffer +// when DCP clients receive DCP messages with these command codes, they need to provide acknowledgement +var BufferedCommandCodeMap = map[CommandCode]bool{ + SET_VBUCKET: true, + UPR_STREAMEND: true, + UPR_SNAPSHOT: true, + UPR_MUTATION: true, + UPR_DELETION: true, + UPR_EXPIRATION: true} + +// Status field for memcached response. +type Status uint16 + +// Matches with protocol_binary.h as source of truth +const ( + SUCCESS = Status(0x00) + KEY_ENOENT = Status(0x01) + KEY_EEXISTS = Status(0x02) + E2BIG = Status(0x03) + EINVAL = Status(0x04) + NOT_STORED = Status(0x05) + DELTA_BADVAL = Status(0x06) + NOT_MY_VBUCKET = Status(0x07) + NO_BUCKET = Status(0x08) + LOCKED = Status(0x09) + AUTH_STALE = Status(0x1f) + AUTH_ERROR = Status(0x20) + AUTH_CONTINUE = Status(0x21) + ERANGE = Status(0x22) + ROLLBACK = Status(0x23) + EACCESS = Status(0x24) + NOT_INITIALIZED = Status(0x25) + UNKNOWN_COMMAND = Status(0x81) + ENOMEM = Status(0x82) + NOT_SUPPORTED = Status(0x83) + EINTERNAL = Status(0x84) + EBUSY = Status(0x85) + TMPFAIL = Status(0x86) + + // SUBDOC + SUBDOC_PATH_NOT_FOUND = Status(0xc0) + SUBDOC_BAD_MULTI = Status(0xcc) + SUBDOC_MULTI_PATH_FAILURE_DELETED = Status(0xd3) +) + +// for log redaction +const ( + UdTagBegin = "<ud>" + UdTagEnd = "</ud>" +) + +var isFatal = map[Status]bool{ + DELTA_BADVAL: true, + NO_BUCKET: true, + AUTH_STALE: true, + AUTH_ERROR: true, + ERANGE: true, + ROLLBACK: true, + EACCESS: true, + ENOMEM: true, + NOT_SUPPORTED: true, +} + +// the producer/consumer bit in dcp flags +var DCP_PRODUCER uint32 = 0x01 + +// the include XATTRS bit in dcp flags +var DCP_OPEN_INCLUDE_XATTRS uint32 = 0x04 + +// the include deletion time bit in dcp flags +var DCP_OPEN_INCLUDE_DELETE_TIMES uint32 = 0x20 + +// Datatype to Include XATTRS in SUBDOC GET +var SUBDOC_FLAG_XATTR uint8 = 0x04 + +// MCItem is an internal representation of an item. +type MCItem struct { + Cas uint64 + Flags, Expiration uint32 + Data []byte +} + +// Number of bytes in a binary protocol header. +const HDR_LEN = 24 + +// Mapping of CommandCode -> name of command (not exhaustive) +var CommandNames map[CommandCode]string + +// StatusNames human readable names for memcached response. +var StatusNames map[Status]string + +func init() { + CommandNames = make(map[CommandCode]string) + CommandNames[GET] = "GET" + CommandNames[SET] = "SET" + CommandNames[ADD] = "ADD" + CommandNames[REPLACE] = "REPLACE" + CommandNames[DELETE] = "DELETE" + CommandNames[INCREMENT] = "INCREMENT" + CommandNames[DECREMENT] = "DECREMENT" + CommandNames[QUIT] = "QUIT" + CommandNames[FLUSH] = "FLUSH" + CommandNames[GETQ] = "GETQ" + CommandNames[NOOP] = "NOOP" + CommandNames[VERSION] = "VERSION" + CommandNames[GETK] = "GETK" + CommandNames[GETKQ] = "GETKQ" + CommandNames[APPEND] = "APPEND" + CommandNames[PREPEND] = "PREPEND" + CommandNames[STAT] = "STAT" + CommandNames[SETQ] = "SETQ" + CommandNames[ADDQ] = "ADDQ" + CommandNames[REPLACEQ] = "REPLACEQ" + CommandNames[DELETEQ] = "DELETEQ" + CommandNames[INCREMENTQ] = "INCREMENTQ" + CommandNames[DECREMENTQ] = "DECREMENTQ" + CommandNames[QUITQ] = "QUITQ" + CommandNames[FLUSHQ] = "FLUSHQ" + CommandNames[APPENDQ] = "APPENDQ" + CommandNames[PREPENDQ] = "PREPENDQ" + CommandNames[RGET] = "RGET" + CommandNames[RSET] = "RSET" + CommandNames[RSETQ] = "RSETQ" + CommandNames[RAPPEND] = "RAPPEND" + CommandNames[RAPPENDQ] = "RAPPENDQ" + CommandNames[RPREPEND] = "RPREPEND" + CommandNames[RPREPENDQ] = "RPREPENDQ" + CommandNames[RDELETE] = "RDELETE" + CommandNames[RDELETEQ] = "RDELETEQ" + CommandNames[RINCR] = "RINCR" + CommandNames[RINCRQ] = "RINCRQ" + CommandNames[RDECR] = "RDECR" + CommandNames[RDECRQ] = "RDECRQ" + + CommandNames[SASL_LIST_MECHS] = "SASL_LIST_MECHS" + CommandNames[SASL_AUTH] = "SASL_AUTH" + CommandNames[SASL_STEP] = "SASL_STEP" + + CommandNames[TAP_CONNECT] = "TAP_CONNECT" + CommandNames[TAP_MUTATION] = "TAP_MUTATION" + CommandNames[TAP_DELETE] = "TAP_DELETE" + CommandNames[TAP_FLUSH] = "TAP_FLUSH" + CommandNames[TAP_OPAQUE] = "TAP_OPAQUE" + CommandNames[TAP_VBUCKET_SET] = "TAP_VBUCKET_SET" + CommandNames[TAP_CHECKPOINT_START] = "TAP_CHECKPOINT_START" + CommandNames[TAP_CHECKPOINT_END] = "TAP_CHECKPOINT_END" + + CommandNames[UPR_OPEN] = "UPR_OPEN" + CommandNames[UPR_ADDSTREAM] = "UPR_ADDSTREAM" + CommandNames[UPR_CLOSESTREAM] = "UPR_CLOSESTREAM" + CommandNames[UPR_FAILOVERLOG] = "UPR_FAILOVERLOG" + CommandNames[UPR_STREAMREQ] = "UPR_STREAMREQ" + CommandNames[UPR_STREAMEND] = "UPR_STREAMEND" + CommandNames[UPR_SNAPSHOT] = "UPR_SNAPSHOT" + CommandNames[UPR_MUTATION] = "UPR_MUTATION" + CommandNames[UPR_DELETION] = "UPR_DELETION" + CommandNames[UPR_EXPIRATION] = "UPR_EXPIRATION" + CommandNames[UPR_FLUSH] = "UPR_FLUSH" + CommandNames[UPR_NOOP] = "UPR_NOOP" + CommandNames[UPR_BUFFERACK] = "UPR_BUFFERACK" + CommandNames[UPR_CONTROL] = "UPR_CONTROL" + CommandNames[SUBDOC_GET] = "SUBDOC_GET" + CommandNames[SUBDOC_MULTI_LOOKUP] = "SUBDOC_MULTI_LOOKUP" + + StatusNames = make(map[Status]string) + StatusNames[SUCCESS] = "SUCCESS" + StatusNames[KEY_ENOENT] = "KEY_ENOENT" + StatusNames[KEY_EEXISTS] = "KEY_EEXISTS" + StatusNames[E2BIG] = "E2BIG" + StatusNames[EINVAL] = "EINVAL" + StatusNames[NOT_STORED] = "NOT_STORED" + StatusNames[DELTA_BADVAL] = "DELTA_BADVAL" + StatusNames[NOT_MY_VBUCKET] = "NOT_MY_VBUCKET" + StatusNames[NO_BUCKET] = "NO_BUCKET" + StatusNames[AUTH_STALE] = "AUTH_STALE" + StatusNames[AUTH_ERROR] = "AUTH_ERROR" + StatusNames[AUTH_CONTINUE] = "AUTH_CONTINUE" + StatusNames[ERANGE] = "ERANGE" + StatusNames[ROLLBACK] = "ROLLBACK" + StatusNames[EACCESS] = "EACCESS" + StatusNames[NOT_INITIALIZED] = "NOT_INITIALIZED" + StatusNames[UNKNOWN_COMMAND] = "UNKNOWN_COMMAND" + StatusNames[ENOMEM] = "ENOMEM" + StatusNames[NOT_SUPPORTED] = "NOT_SUPPORTED" + StatusNames[EINTERNAL] = "EINTERNAL" + StatusNames[EBUSY] = "EBUSY" + StatusNames[TMPFAIL] = "TMPFAIL" + StatusNames[SUBDOC_PATH_NOT_FOUND] = "SUBDOC_PATH_NOT_FOUND" + StatusNames[SUBDOC_BAD_MULTI] = "SUBDOC_BAD_MULTI" + +} + +// String an op code. +func (o CommandCode) String() (rv string) { + rv = CommandNames[o] + if rv == "" { + rv = fmt.Sprintf("0x%02x", int(o)) + } + return rv +} + +// String an op code. +func (s Status) String() (rv string) { + rv = StatusNames[s] + if rv == "" { + rv = fmt.Sprintf("0x%02x", int(s)) + } + return rv +} + +// IsQuiet will return true if a command is a "quiet" command. +func (o CommandCode) IsQuiet() bool { + switch o { + case GETQ, + GETKQ, + SETQ, + ADDQ, + REPLACEQ, + DELETEQ, + INCREMENTQ, + DECREMENTQ, + QUITQ, + FLUSHQ, + APPENDQ, + PREPENDQ, + RSETQ, + RAPPENDQ, + RPREPENDQ, + RDELETEQ, + RINCRQ, + RDECRQ: + return true + } + return false +} diff --git a/vendor/github.com/couchbase/gomemcached/mc_req.go b/vendor/github.com/couchbase/gomemcached/mc_req.go new file mode 100644 index 0000000000..3ff67ab9a7 --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/mc_req.go @@ -0,0 +1,197 @@ +package gomemcached + +import ( + "encoding/binary" + "fmt" + "io" +) + +// The maximum reasonable body length to expect. +// Anything larger than this will result in an error. +// The current limit, 20MB, is the size limit supported by ep-engine. +var MaxBodyLen = int(20 * 1024 * 1024) + +// MCRequest is memcached Request +type MCRequest struct { + // The command being issued + Opcode CommandCode + // The CAS (if applicable, or 0) + Cas uint64 + // An opaque value to be returned with this request + Opaque uint32 + // The vbucket to which this command belongs + VBucket uint16 + // Command extras, key, and body + Extras, Key, Body, ExtMeta []byte + // Datatype identifier + DataType uint8 +} + +// Size gives the number of bytes this request requires. +func (req *MCRequest) Size() int { + return HDR_LEN + len(req.Extras) + len(req.Key) + len(req.Body) + len(req.ExtMeta) +} + +// A debugging string representation of this request +func (req MCRequest) String() string { + return fmt.Sprintf("{MCRequest opcode=%s, bodylen=%d, key='%s'}", + req.Opcode, len(req.Body), req.Key) +} + +func (req *MCRequest) fillHeaderBytes(data []byte) int { + + pos := 0 + data[pos] = REQ_MAGIC + pos++ + data[pos] = byte(req.Opcode) + pos++ + binary.BigEndian.PutUint16(data[pos:pos+2], + uint16(len(req.Key))) + pos += 2 + + // 4 + data[pos] = byte(len(req.Extras)) + pos++ + // Data type + if req.DataType != 0 { + data[pos] = byte(req.DataType) + } + pos++ + binary.BigEndian.PutUint16(data[pos:pos+2], req.VBucket) + pos += 2 + + // 8 + binary.BigEndian.PutUint32(data[pos:pos+4], + uint32(len(req.Body)+len(req.Key)+len(req.Extras)+len(req.ExtMeta))) + pos += 4 + + // 12 + binary.BigEndian.PutUint32(data[pos:pos+4], req.Opaque) + pos += 4 + + // 16 + if req.Cas != 0 { + binary.BigEndian.PutUint64(data[pos:pos+8], req.Cas) + } + pos += 8 + + if len(req.Extras) > 0 { + copy(data[pos:pos+len(req.Extras)], req.Extras) + pos += len(req.Extras) + } + + if len(req.Key) > 0 { + copy(data[pos:pos+len(req.Key)], req.Key) + pos += len(req.Key) + } + + return pos +} + +// HeaderBytes will return the wire representation of the request header +// (with the extras and key). +func (req *MCRequest) HeaderBytes() []byte { + data := make([]byte, HDR_LEN+len(req.Extras)+len(req.Key)) + + req.fillHeaderBytes(data) + + return data +} + +// Bytes will return the wire representation of this request. +func (req *MCRequest) Bytes() []byte { + data := make([]byte, req.Size()) + + pos := req.fillHeaderBytes(data) + + if len(req.Body) > 0 { + copy(data[pos:pos+len(req.Body)], req.Body) + } + + if len(req.ExtMeta) > 0 { + copy(data[pos+len(req.Body):pos+len(req.Body)+len(req.ExtMeta)], req.ExtMeta) + } + + return data +} + +// Transmit will send this request message across a writer. +func (req *MCRequest) Transmit(w io.Writer) (n int, err error) { + if len(req.Body) < 128 { + n, err = w.Write(req.Bytes()) + } else { + n, err = w.Write(req.HeaderBytes()) + if err == nil { + m := 0 + m, err = w.Write(req.Body) + n += m + } + } + return +} + +// Receive will fill this MCRequest with the data from a reader. +func (req *MCRequest) Receive(r io.Reader, hdrBytes []byte) (int, error) { + if len(hdrBytes) < HDR_LEN { + hdrBytes = []byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0} + } + n, err := io.ReadFull(r, hdrBytes) + if err != nil { + return n, err + } + + if hdrBytes[0] != RES_MAGIC && hdrBytes[0] != REQ_MAGIC { + return n, fmt.Errorf("bad magic: 0x%02x", hdrBytes[0]) + } + + klen := int(binary.BigEndian.Uint16(hdrBytes[2:])) + elen := int(hdrBytes[4]) + // Data type at 5 + req.DataType = uint8(hdrBytes[5]) + + req.Opcode = CommandCode(hdrBytes[1]) + // Vbucket at 6:7 + req.VBucket = binary.BigEndian.Uint16(hdrBytes[6:]) + totalBodyLen := int(binary.BigEndian.Uint32(hdrBytes[8:])) + + req.Opaque = binary.BigEndian.Uint32(hdrBytes[12:]) + req.Cas = binary.BigEndian.Uint64(hdrBytes[16:]) + + if totalBodyLen > 0 { + buf := make([]byte, totalBodyLen) + m, err := io.ReadFull(r, buf) + n += m + if err == nil { + if req.Opcode >= TAP_MUTATION && + req.Opcode <= TAP_CHECKPOINT_END && + len(buf) > 1 { + // In these commands there is "engine private" + // data at the end of the extras. The first 2 + // bytes of extra data give its length. + elen += int(binary.BigEndian.Uint16(buf)) + } + + req.Extras = buf[0:elen] + req.Key = buf[elen : klen+elen] + + // get the length of extended metadata + extMetaLen := 0 + if elen > 29 { + extMetaLen = int(binary.BigEndian.Uint16(req.Extras[28:30])) + } + + bodyLen := totalBodyLen - klen - elen - extMetaLen + if bodyLen > MaxBodyLen { + return n, fmt.Errorf("%d is too big (max %d)", + bodyLen, MaxBodyLen) + } + + req.Body = buf[klen+elen : klen+elen+bodyLen] + req.ExtMeta = buf[klen+elen+bodyLen:] + } + } + return n, err +} diff --git a/vendor/github.com/couchbase/gomemcached/mc_res.go b/vendor/github.com/couchbase/gomemcached/mc_res.go new file mode 100644 index 0000000000..2b4cfe1349 --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/mc_res.go @@ -0,0 +1,267 @@ +package gomemcached + +import ( + "encoding/binary" + "fmt" + "io" + "sync" +) + +// MCResponse is memcached response +type MCResponse struct { + // The command opcode of the command that sent the request + Opcode CommandCode + // The status of the response + Status Status + // The opaque sent in the request + Opaque uint32 + // The CAS identifier (if applicable) + Cas uint64 + // Extras, key, and body for this response + Extras, Key, Body []byte + // If true, this represents a fatal condition and we should hang up + Fatal bool + // Datatype identifier + DataType uint8 +} + +// A debugging string representation of this response +func (res MCResponse) String() string { + return fmt.Sprintf("{MCResponse status=%v keylen=%d, extralen=%d, bodylen=%d}", + res.Status, len(res.Key), len(res.Extras), len(res.Body)) +} + +// Response as an error. +func (res *MCResponse) Error() string { + return fmt.Sprintf("MCResponse status=%v, opcode=%v, opaque=%v, msg: %s", + res.Status, res.Opcode, res.Opaque, string(res.Body)) +} + +func errStatus(e error) Status { + status := Status(0xffff) + if res, ok := e.(*MCResponse); ok { + status = res.Status + } + return status +} + +// IsNotFound is true if this error represents a "not found" response. +func IsNotFound(e error) bool { + return errStatus(e) == KEY_ENOENT +} + +// IsFatal is false if this error isn't believed to be fatal to a connection. +func IsFatal(e error) bool { + if e == nil { + return false + } + _, ok := isFatal[errStatus(e)] + if ok { + return true + } + return false +} + +// Size is number of bytes this response consumes on the wire. +func (res *MCResponse) Size() int { + return HDR_LEN + len(res.Extras) + len(res.Key) + len(res.Body) +} + +func (res *MCResponse) fillHeaderBytes(data []byte) int { + pos := 0 + data[pos] = RES_MAGIC + pos++ + data[pos] = byte(res.Opcode) + pos++ + binary.BigEndian.PutUint16(data[pos:pos+2], + uint16(len(res.Key))) + pos += 2 + + // 4 + data[pos] = byte(len(res.Extras)) + pos++ + // Data type + if res.DataType != 0 { + data[pos] = byte(res.DataType) + } else { + data[pos] = 0 + } + pos++ + binary.BigEndian.PutUint16(data[pos:pos+2], uint16(res.Status)) + pos += 2 + + // 8 + binary.BigEndian.PutUint32(data[pos:pos+4], + uint32(len(res.Body)+len(res.Key)+len(res.Extras))) + pos += 4 + + // 12 + binary.BigEndian.PutUint32(data[pos:pos+4], res.Opaque) + pos += 4 + + // 16 + binary.BigEndian.PutUint64(data[pos:pos+8], res.Cas) + pos += 8 + + if len(res.Extras) > 0 { + copy(data[pos:pos+len(res.Extras)], res.Extras) + pos += len(res.Extras) + } + + if len(res.Key) > 0 { + copy(data[pos:pos+len(res.Key)], res.Key) + pos += len(res.Key) + } + + return pos +} + +// HeaderBytes will get just the header bytes for this response. +func (res *MCResponse) HeaderBytes() []byte { + data := make([]byte, HDR_LEN+len(res.Extras)+len(res.Key)) + + res.fillHeaderBytes(data) + + return data +} + +// Bytes will return the actual bytes transmitted for this response. +func (res *MCResponse) Bytes() []byte { + data := make([]byte, res.Size()) + + pos := res.fillHeaderBytes(data) + + copy(data[pos:pos+len(res.Body)], res.Body) + + return data +} + +// Transmit will send this response message across a writer. +func (res *MCResponse) Transmit(w io.Writer) (n int, err error) { + if len(res.Body) < 128 { + n, err = w.Write(res.Bytes()) + } else { + n, err = w.Write(res.HeaderBytes()) + if err == nil { + m := 0 + m, err = w.Write(res.Body) + m += n + } + } + return +} + +// Receive will fill this MCResponse with the data from this reader. +func (res *MCResponse) Receive(r io.Reader, hdrBytes []byte) (n int, err error) { + if len(hdrBytes) < HDR_LEN { + hdrBytes = []byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0} + } + n, err = io.ReadFull(r, hdrBytes) + if err != nil { + return n, err + } + + if hdrBytes[0] != RES_MAGIC && hdrBytes[0] != REQ_MAGIC { + return n, fmt.Errorf("bad magic: 0x%02x", hdrBytes[0]) + } + + klen := int(binary.BigEndian.Uint16(hdrBytes[2:4])) + elen := int(hdrBytes[4]) + + res.Opcode = CommandCode(hdrBytes[1]) + res.DataType = uint8(hdrBytes[5]) + res.Status = Status(binary.BigEndian.Uint16(hdrBytes[6:8])) + res.Opaque = binary.BigEndian.Uint32(hdrBytes[12:16]) + res.Cas = binary.BigEndian.Uint64(hdrBytes[16:24]) + + bodyLen := int(binary.BigEndian.Uint32(hdrBytes[8:12])) - (klen + elen) + + //defer function to debug the panic seen with MB-15557 + defer func() { + if e := recover(); e != nil { + err = fmt.Errorf(`Panic in Receive. Response %v \n + key len %v extra len %v bodylen %v`, res, klen, elen, bodyLen) + } + }() + + buf := make([]byte, klen+elen+bodyLen) + m, err := io.ReadFull(r, buf) + if err == nil { + res.Extras = buf[0:elen] + res.Key = buf[elen : klen+elen] + res.Body = buf[klen+elen:] + } + + return n + m, err +} + +type MCResponsePool struct { + pool *sync.Pool +} + +func NewMCResponsePool() *MCResponsePool { + rv := &MCResponsePool{ + pool: &sync.Pool{ + New: func() interface{} { + return &MCResponse{} + }, + }, + } + + return rv +} + +func (this *MCResponsePool) Get() *MCResponse { + return this.pool.Get().(*MCResponse) +} + +func (this *MCResponsePool) Put(r *MCResponse) { + if r == nil { + return + } + + r.Extras = nil + r.Key = nil + r.Body = nil + r.Fatal = false + + this.pool.Put(r) +} + +type StringMCResponsePool struct { + pool *sync.Pool + size int +} + +func NewStringMCResponsePool(size int) *StringMCResponsePool { + rv := &StringMCResponsePool{ + pool: &sync.Pool{ + New: func() interface{} { + return make(map[string]*MCResponse, size) + }, + }, + size: size, + } + + return rv +} + +func (this *StringMCResponsePool) Get() map[string]*MCResponse { + return this.pool.Get().(map[string]*MCResponse) +} + +func (this *StringMCResponsePool) Put(m map[string]*MCResponse) { + if m == nil || len(m) > 2*this.size { + return + } + + for k := range m { + m[k] = nil + delete(m, k) + } + + this.pool.Put(m) +} diff --git a/vendor/github.com/couchbase/gomemcached/tap.go b/vendor/github.com/couchbase/gomemcached/tap.go new file mode 100644 index 0000000000..e48623281b --- /dev/null +++ b/vendor/github.com/couchbase/gomemcached/tap.go @@ -0,0 +1,168 @@ +package gomemcached + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "io/ioutil" + "strings" +) + +type TapConnectFlag uint32 + +// Tap connect option flags +const ( + BACKFILL = TapConnectFlag(0x01) + DUMP = TapConnectFlag(0x02) + LIST_VBUCKETS = TapConnectFlag(0x04) + TAKEOVER_VBUCKETS = TapConnectFlag(0x08) + SUPPORT_ACK = TapConnectFlag(0x10) + REQUEST_KEYS_ONLY = TapConnectFlag(0x20) + CHECKPOINT = TapConnectFlag(0x40) + REGISTERED_CLIENT = TapConnectFlag(0x80) + FIX_FLAG_BYTEORDER = TapConnectFlag(0x100) +) + +// Tap opaque event subtypes +const ( + TAP_OPAQUE_ENABLE_AUTO_NACK = 0 + TAP_OPAQUE_INITIAL_VBUCKET_STREAM = 1 + TAP_OPAQUE_ENABLE_CHECKPOINT_SYNC = 2 + TAP_OPAQUE_CLOSE_TAP_STREAM = 7 + TAP_OPAQUE_CLOSE_BACKFILL = 8 +) + +// Tap item flags +const ( + TAP_ACK = 1 + TAP_NO_VALUE = 2 + TAP_FLAG_NETWORK_BYTE_ORDER = 4 +) + +// TapConnectFlagNames for TapConnectFlag +var TapConnectFlagNames = map[TapConnectFlag]string{ + BACKFILL: "BACKFILL", + DUMP: "DUMP", + LIST_VBUCKETS: "LIST_VBUCKETS", + TAKEOVER_VBUCKETS: "TAKEOVER_VBUCKETS", + SUPPORT_ACK: "SUPPORT_ACK", + REQUEST_KEYS_ONLY: "REQUEST_KEYS_ONLY", + CHECKPOINT: "CHECKPOINT", + REGISTERED_CLIENT: "REGISTERED_CLIENT", + FIX_FLAG_BYTEORDER: "FIX_FLAG_BYTEORDER", +} + +// TapItemParser is a function to parse a single tap extra. +type TapItemParser func(io.Reader) (interface{}, error) + +// TapParseUint64 is a function to parse a single tap uint64. +func TapParseUint64(r io.Reader) (interface{}, error) { + var rv uint64 + err := binary.Read(r, binary.BigEndian, &rv) + return rv, err +} + +// TapParseUint16 is a function to parse a single tap uint16. +func TapParseUint16(r io.Reader) (interface{}, error) { + var rv uint16 + err := binary.Read(r, binary.BigEndian, &rv) + return rv, err +} + +// TapParseBool is a function to parse a single tap boolean. +func TapParseBool(r io.Reader) (interface{}, error) { + return true, nil +} + +// TapParseVBList parses a list of vBucket numbers as []uint16. +func TapParseVBList(r io.Reader) (interface{}, error) { + num, err := TapParseUint16(r) + if err != nil { + return nil, err + } + n := int(num.(uint16)) + + rv := make([]uint16, n) + for i := 0; i < n; i++ { + x, err := TapParseUint16(r) + if err != nil { + return nil, err + } + rv[i] = x.(uint16) + } + + return rv, err +} + +// TapFlagParsers parser functions for TAP fields. +var TapFlagParsers = map[TapConnectFlag]TapItemParser{ + BACKFILL: TapParseUint64, + LIST_VBUCKETS: TapParseVBList, +} + +// SplitFlags will split the ORed flags into the individual bit flags. +func (f TapConnectFlag) SplitFlags() []TapConnectFlag { + rv := []TapConnectFlag{} + for i := uint32(1); f != 0; i = i << 1 { + if uint32(f)&i == i { + rv = append(rv, TapConnectFlag(i)) + } + f = TapConnectFlag(uint32(f) & (^i)) + } + return rv +} + +func (f TapConnectFlag) String() string { + parts := []string{} + for _, x := range f.SplitFlags() { + p := TapConnectFlagNames[x] + if p == "" { + p = fmt.Sprintf("0x%x", int(x)) + } + parts = append(parts, p) + } + return strings.Join(parts, "|") +} + +type TapConnect struct { + Flags map[TapConnectFlag]interface{} + RemainingBody []byte + Name string +} + +// ParseTapCommands parse the tap request into the interesting bits we may +// need to do something with. +func (req *MCRequest) ParseTapCommands() (TapConnect, error) { + rv := TapConnect{ + Flags: map[TapConnectFlag]interface{}{}, + Name: string(req.Key), + } + + if len(req.Extras) < 4 { + return rv, fmt.Errorf("not enough extra bytes: %x", req.Extras) + } + + flags := TapConnectFlag(binary.BigEndian.Uint32(req.Extras)) + + r := bytes.NewReader(req.Body) + + for _, f := range flags.SplitFlags() { + fun := TapFlagParsers[f] + if fun == nil { + fun = TapParseBool + } + + val, err := fun(r) + if err != nil { + return rv, err + } + + rv.Flags[f] = val + } + + var err error + rv.RemainingBody, err = ioutil.ReadAll(r) + + return rv, err +} diff --git a/vendor/github.com/couchbase/goutils/LICENSE.md b/vendor/github.com/couchbase/goutils/LICENSE.md new file mode 100644 index 0000000000..a572e246e6 --- /dev/null +++ b/vendor/github.com/couchbase/goutils/LICENSE.md @@ -0,0 +1,47 @@ +COUCHBASE INC. COMMUNITY EDITION LICENSE AGREEMENT + +IMPORTANT-READ CAREFULLY: BY CLICKING THE "I ACCEPT" BOX OR INSTALLING, +DOWNLOADING OR OTHERWISE USING THIS SOFTWARE AND ANY ASSOCIATED +DOCUMENTATION, YOU, ON BEHALF OF YOURSELF OR AS AN AUTHORIZED +REPRESENTATIVE ON BEHALF OF AN ENTITY ("LICENSEE") AGREE TO ALL THE +TERMS OF THIS COMMUNITY EDITION LICENSE AGREEMENT (THE "AGREEMENT") +REGARDING YOUR USE OF THE SOFTWARE. YOU REPRESENT AND WARRANT THAT YOU +HAVE FULL LEGAL AUTHORITY TO BIND THE LICENSEE TO THIS AGREEMENT. IF YOU +DO NOT AGREE WITH ALL OF THESE TERMS, DO NOT SELECT THE "I ACCEPT" BOX +AND DO NOT INSTALL, DOWNLOAD OR OTHERWISE USE THE SOFTWARE. THE +EFFECTIVE DATE OF THIS AGREEMENT IS THE DATE ON WHICH YOU CLICK "I +ACCEPT" OR OTHERWISE INSTALL, DOWNLOAD OR USE THE SOFTWARE. + +1. License Grant. Couchbase Inc. hereby grants Licensee, free of charge, +the non-exclusive right to use, copy, merge, publish, distribute, +sublicense, and/or sell copies of the Software, and to permit persons to +whom the Software is furnished to do so, subject to Licensee including +the following copyright notice in all copies or substantial portions of +the Software: + +Couchbase (r) http://www.Couchbase.com Copyright 2016 Couchbase, Inc. + +As used in this Agreement, "Software" means the object code version of +the applicable elastic data management server software provided by +Couchbase Inc. + +2. Restrictions. Licensee will not reverse engineer, disassemble, or +decompile the Software (except to the extent such restrictions are +prohibited by law). + +3. Support. Couchbase, Inc. will provide Licensee with access to, and +use of, the Couchbase, Inc. support forum available at the following +URL: http://www.couchbase.org/forums/. Couchbase, Inc. may, at its +discretion, modify, suspend or terminate support at any time upon notice +to Licensee. + +4. Warranty Disclaimer and Limitation of Liability. THE SOFTWARE IS +PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +COUCHBASE INC. OR THE AUTHORS OR COPYRIGHT HOLDERS IN THE SOFTWARE BE +LIABLE FOR ANY CLAIM, DAMAGES (IINCLUDING, WITHOUT LIMITATION, DIRECT, +INDIRECT OR CONSEQUENTIAL DAMAGES) OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/couchbase/goutils/logging/logger.go b/vendor/github.com/couchbase/goutils/logging/logger.go new file mode 100644 index 0000000000..b9948f9b2e --- /dev/null +++ b/vendor/github.com/couchbase/goutils/logging/logger.go @@ -0,0 +1,481 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package logging + +import ( + "os" + "runtime" + "strings" + "sync" +) + +type Level int + +const ( + NONE = Level(iota) // Disable all logging + FATAL // System is in severe error state and has to abort + SEVERE // System is in severe error state and cannot recover reliably + ERROR // System is in error state but can recover and continue reliably + WARN // System approaching error state, or is in a correct but undesirable state + INFO // System-level events and status, in correct states + REQUEST // Request-level events, with request-specific rlevel + TRACE // Trace detailed system execution, e.g. function entry / exit + DEBUG // Debug +) + +type LogEntryFormatter int + +const ( + TEXTFORMATTER = LogEntryFormatter(iota) + JSONFORMATTER + KVFORMATTER +) + +func (level Level) String() string { + return _LEVEL_NAMES[level] +} + +var _LEVEL_NAMES = []string{ + DEBUG: "DEBUG", + TRACE: "TRACE", + REQUEST: "REQUEST", + INFO: "INFO", + WARN: "WARN", + ERROR: "ERROR", + SEVERE: "SEVERE", + FATAL: "FATAL", + NONE: "NONE", +} + +var _LEVEL_MAP = map[string]Level{ + "debug": DEBUG, + "trace": TRACE, + "request": REQUEST, + "info": INFO, + "warn": WARN, + "error": ERROR, + "severe": SEVERE, + "fatal": FATAL, + "none": NONE, +} + +func ParseLevel(name string) (level Level, ok bool) { + level, ok = _LEVEL_MAP[strings.ToLower(name)] + return +} + +/* + +Pair supports logging of key-value pairs. Keys beginning with _ are +reserved for the logger, e.g. _time, _level, _msg, and _rlevel. The +Pair APIs are designed to avoid heap allocation and garbage +collection. + +*/ +type Pairs []Pair +type Pair struct { + Name string + Value interface{} +} + +/* + +Map allows key-value pairs to be specified using map literals or data +structures. For example: + +Errorm(msg, Map{...}) + +Map incurs heap allocation and garbage collection, so the Pair APIs +should be preferred. + +*/ +type Map map[string]interface{} + +// Logger provides a common interface for logging libraries +type Logger interface { + /* + These APIs write all the given pairs in addition to standard logger keys. + */ + Logp(level Level, msg string, kv ...Pair) + + Debugp(msg string, kv ...Pair) + + Tracep(msg string, kv ...Pair) + + Requestp(rlevel Level, msg string, kv ...Pair) + + Infop(msg string, kv ...Pair) + + Warnp(msg string, kv ...Pair) + + Errorp(msg string, kv ...Pair) + + Severep(msg string, kv ...Pair) + + Fatalp(msg string, kv ...Pair) + + /* + These APIs write the fields in the given kv Map in addition to standard logger keys. + */ + Logm(level Level, msg string, kv Map) + + Debugm(msg string, kv Map) + + Tracem(msg string, kv Map) + + Requestm(rlevel Level, msg string, kv Map) + + Infom(msg string, kv Map) + + Warnm(msg string, kv Map) + + Errorm(msg string, kv Map) + + Severem(msg string, kv Map) + + Fatalm(msg string, kv Map) + + /* + + These APIs only write _msg, _time, _level, and other logger keys. If + the msg contains other fields, use the Pair or Map APIs instead. + + */ + Logf(level Level, fmt string, args ...interface{}) + + Debugf(fmt string, args ...interface{}) + + Tracef(fmt string, args ...interface{}) + + Requestf(rlevel Level, fmt string, args ...interface{}) + + Infof(fmt string, args ...interface{}) + + Warnf(fmt string, args ...interface{}) + + Errorf(fmt string, args ...interface{}) + + Severef(fmt string, args ...interface{}) + + Fatalf(fmt string, args ...interface{}) + + /* + These APIs control the logging level + */ + + SetLevel(Level) // Set the logging level + + Level() Level // Get the current logging level +} + +var logger Logger = nil +var curLevel Level = DEBUG // initially set to never skip + +var loggerMutex sync.RWMutex + +// All the methods below first acquire the mutex (mostly in exclusive mode) +// and only then check if logging at the current level is enabled. +// This introduces a fair bottleneck for those log entries that should be +// skipped (the majority, at INFO or below levels) +// We try to predict here if we should lock the mutex at all by caching +// the current log level: while dynamically changing logger, there might +// be the odd entry skipped as the new level is cached. +// Since we seem to never change the logger, this is not an issue. +func skipLogging(level Level) bool { + if logger == nil { + return true + } + return level > curLevel +} + +func SetLogger(newLogger Logger) { + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger = newLogger + if logger == nil { + curLevel = NONE + } else { + curLevel = newLogger.Level() + } +} + +func Logp(level Level, msg string, kv ...Pair) { + if skipLogging(level) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Logp(level, msg, kv...) +} + +func Debugp(msg string, kv ...Pair) { + if skipLogging(DEBUG) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Debugp(msg, kv...) +} + +func Tracep(msg string, kv ...Pair) { + if skipLogging(TRACE) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Tracep(msg, kv...) +} + +func Requestp(rlevel Level, msg string, kv ...Pair) { + if skipLogging(REQUEST) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Requestp(rlevel, msg, kv...) +} + +func Infop(msg string, kv ...Pair) { + if skipLogging(INFO) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Infop(msg, kv...) +} + +func Warnp(msg string, kv ...Pair) { + if skipLogging(WARN) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Warnp(msg, kv...) +} + +func Errorp(msg string, kv ...Pair) { + if skipLogging(ERROR) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Errorp(msg, kv...) +} + +func Severep(msg string, kv ...Pair) { + if skipLogging(SEVERE) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Severep(msg, kv...) +} + +func Fatalp(msg string, kv ...Pair) { + if skipLogging(FATAL) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Fatalp(msg, kv...) +} + +func Logm(level Level, msg string, kv Map) { + if skipLogging(level) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Logm(level, msg, kv) +} + +func Debugm(msg string, kv Map) { + if skipLogging(DEBUG) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Debugm(msg, kv) +} + +func Tracem(msg string, kv Map) { + if skipLogging(TRACE) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Tracem(msg, kv) +} + +func Requestm(rlevel Level, msg string, kv Map) { + if skipLogging(REQUEST) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Requestm(rlevel, msg, kv) +} + +func Infom(msg string, kv Map) { + if skipLogging(INFO) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Infom(msg, kv) +} + +func Warnm(msg string, kv Map) { + if skipLogging(WARN) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Warnm(msg, kv) +} + +func Errorm(msg string, kv Map) { + if skipLogging(ERROR) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Errorm(msg, kv) +} + +func Severem(msg string, kv Map) { + if skipLogging(SEVERE) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Severem(msg, kv) +} + +func Fatalm(msg string, kv Map) { + if skipLogging(FATAL) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Fatalm(msg, kv) +} + +func Logf(level Level, fmt string, args ...interface{}) { + if skipLogging(level) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Logf(level, fmt, args...) +} + +func Debugf(fmt string, args ...interface{}) { + if skipLogging(DEBUG) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Debugf(fmt, args...) +} + +func Tracef(fmt string, args ...interface{}) { + if skipLogging(TRACE) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Tracef(fmt, args...) +} + +func Requestf(rlevel Level, fmt string, args ...interface{}) { + if skipLogging(REQUEST) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Requestf(rlevel, fmt, args...) +} + +func Infof(fmt string, args ...interface{}) { + if skipLogging(INFO) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Infof(fmt, args...) +} + +func Warnf(fmt string, args ...interface{}) { + if skipLogging(WARN) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Warnf(fmt, args...) +} + +func Errorf(fmt string, args ...interface{}) { + if skipLogging(ERROR) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Errorf(fmt, args...) +} + +func Severef(fmt string, args ...interface{}) { + if skipLogging(SEVERE) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Severef(fmt, args...) +} + +func Fatalf(fmt string, args ...interface{}) { + if skipLogging(FATAL) { + return + } + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Fatalf(fmt, args...) +} + +func SetLevel(level Level) { + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.SetLevel(level) + curLevel = level +} + +func LogLevel() Level { + loggerMutex.RLock() + defer loggerMutex.RUnlock() + return logger.Level() +} + +func Stackf(level Level, fmt string, args ...interface{}) { + if skipLogging(level) { + return + } + buf := make([]byte, 1<<16) + n := runtime.Stack(buf, false) + s := string(buf[0:n]) + loggerMutex.Lock() + defer loggerMutex.Unlock() + logger.Logf(level, fmt, args...) + logger.Logf(level, s) +} + +func init() { + logger = NewLogger(os.Stderr, INFO, TEXTFORMATTER) + SetLogger(logger) +} diff --git a/vendor/github.com/couchbase/goutils/logging/logger_golog.go b/vendor/github.com/couchbase/goutils/logging/logger_golog.go new file mode 100644 index 0000000000..eec432a513 --- /dev/null +++ b/vendor/github.com/couchbase/goutils/logging/logger_golog.go @@ -0,0 +1,318 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package logging + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "log" + "time" +) + +type goLogger struct { + logger *log.Logger + level Level + entryFormatter formatter +} + +const ( + _LEVEL = "_level" + _MSG = "_msg" + _TIME = "_time" + _RLEVEL = "_rlevel" +) + +func NewLogger(out io.Writer, lvl Level, fmtLogging LogEntryFormatter) *goLogger { + logger := &goLogger{ + logger: log.New(out, "", 0), + level: lvl, + } + if fmtLogging == JSONFORMATTER { + logger.entryFormatter = &jsonFormatter{} + } else if fmtLogging == KVFORMATTER { + logger.entryFormatter = &keyvalueFormatter{} + } else { + logger.entryFormatter = &textFormatter{} + } + return logger +} + +func (gl *goLogger) Logp(level Level, msg string, kv ...Pair) { + if gl.logger == nil { + return + } + if level <= gl.level { + e := newLogEntry(msg, level) + copyPairs(e, kv) + gl.log(e) + } +} + +func (gl *goLogger) Debugp(msg string, kv ...Pair) { + gl.Logp(DEBUG, msg, kv...) +} + +func (gl *goLogger) Tracep(msg string, kv ...Pair) { + gl.Logp(TRACE, msg, kv...) +} + +func (gl *goLogger) Requestp(rlevel Level, msg string, kv ...Pair) { + if gl.logger == nil { + return + } + if REQUEST <= gl.level { + e := newLogEntry(msg, REQUEST) + e.Rlevel = rlevel + copyPairs(e, kv) + gl.log(e) + } +} + +func (gl *goLogger) Infop(msg string, kv ...Pair) { + gl.Logp(INFO, msg, kv...) +} + +func (gl *goLogger) Warnp(msg string, kv ...Pair) { + gl.Logp(WARN, msg, kv...) +} + +func (gl *goLogger) Errorp(msg string, kv ...Pair) { + gl.Logp(ERROR, msg, kv...) +} + +func (gl *goLogger) Severep(msg string, kv ...Pair) { + gl.Logp(SEVERE, msg, kv...) +} + +func (gl *goLogger) Fatalp(msg string, kv ...Pair) { + gl.Logp(FATAL, msg, kv...) +} + +func (gl *goLogger) Logm(level Level, msg string, kv Map) { + if gl.logger == nil { + return + } + if level <= gl.level { + e := newLogEntry(msg, level) + e.Data = kv + gl.log(e) + } +} + +func (gl *goLogger) Debugm(msg string, kv Map) { + gl.Logm(DEBUG, msg, kv) +} + +func (gl *goLogger) Tracem(msg string, kv Map) { + gl.Logm(TRACE, msg, kv) +} + +func (gl *goLogger) Requestm(rlevel Level, msg string, kv Map) { + if gl.logger == nil { + return + } + if REQUEST <= gl.level { + e := newLogEntry(msg, REQUEST) + e.Rlevel = rlevel + e.Data = kv + gl.log(e) + } +} + +func (gl *goLogger) Infom(msg string, kv Map) { + gl.Logm(INFO, msg, kv) +} + +func (gl *goLogger) Warnm(msg string, kv Map) { + gl.Logm(WARN, msg, kv) +} + +func (gl *goLogger) Errorm(msg string, kv Map) { + gl.Logm(ERROR, msg, kv) +} + +func (gl *goLogger) Severem(msg string, kv Map) { + gl.Logm(SEVERE, msg, kv) +} + +func (gl *goLogger) Fatalm(msg string, kv Map) { + gl.Logm(FATAL, msg, kv) +} + +func (gl *goLogger) Logf(level Level, format string, args ...interface{}) { + if gl.logger == nil { + return + } + if level <= gl.level { + e := newLogEntry(fmt.Sprintf(format, args...), level) + gl.log(e) + } +} + +func (gl *goLogger) Debugf(format string, args ...interface{}) { + gl.Logf(DEBUG, format, args...) +} + +func (gl *goLogger) Tracef(format string, args ...interface{}) { + gl.Logf(TRACE, format, args...) +} + +func (gl *goLogger) Requestf(rlevel Level, format string, args ...interface{}) { + if gl.logger == nil { + return + } + if REQUEST <= gl.level { + e := newLogEntry(fmt.Sprintf(format, args...), REQUEST) + e.Rlevel = rlevel + gl.log(e) + } +} + +func (gl *goLogger) Infof(format string, args ...interface{}) { + gl.Logf(INFO, format, args...) +} + +func (gl *goLogger) Warnf(format string, args ...interface{}) { + gl.Logf(WARN, format, args...) +} + +func (gl *goLogger) Errorf(format string, args ...interface{}) { + gl.Logf(ERROR, format, args...) +} + +func (gl *goLogger) Severef(format string, args ...interface{}) { + gl.Logf(SEVERE, format, args...) +} + +func (gl *goLogger) Fatalf(format string, args ...interface{}) { + gl.Logf(FATAL, format, args...) +} + +func (gl *goLogger) Level() Level { + return gl.level +} + +func (gl *goLogger) SetLevel(level Level) { + gl.level = level +} + +func (gl *goLogger) log(newEntry *logEntry) { + s := gl.entryFormatter.format(newEntry) + gl.logger.Print(s) +} + +type logEntry struct { + Time string + Level Level + Rlevel Level + Message string + Data Map +} + +func newLogEntry(msg string, level Level) *logEntry { + return &logEntry{ + Time: time.Now().Format("2006-01-02T15:04:05.000-07:00"), // time.RFC3339 with milliseconds + Level: level, + Rlevel: NONE, + Message: msg, + } +} + +func copyPairs(newEntry *logEntry, pairs []Pair) { + newEntry.Data = make(Map, len(pairs)) + for _, p := range pairs { + newEntry.Data[p.Name] = p.Value + } +} + +type formatter interface { + format(*logEntry) string +} + +type textFormatter struct { +} + +// ex. 2016-02-10T09:15:25.498-08:00 [INFO] This is a message from test in text format + +func (*textFormatter) format(newEntry *logEntry) string { + b := &bytes.Buffer{} + appendValue(b, newEntry.Time) + if newEntry.Rlevel != NONE { + fmt.Fprintf(b, "[%s,%s] ", newEntry.Level.String(), newEntry.Rlevel.String()) + } else { + fmt.Fprintf(b, "[%s] ", newEntry.Level.String()) + } + appendValue(b, newEntry.Message) + for key, value := range newEntry.Data { + appendKeyValue(b, key, value) + } + b.WriteByte('\n') + s := bytes.NewBuffer(b.Bytes()) + return s.String() +} + +func appendValue(b *bytes.Buffer, value interface{}) { + if _, ok := value.(string); ok { + fmt.Fprintf(b, "%s ", value) + } else { + fmt.Fprintf(b, "%v ", value) + } +} + +type keyvalueFormatter struct { +} + +// ex. _time=2016-02-10T09:15:25.498-08:00 _level=INFO _msg=This is a message from test in key-value format + +func (*keyvalueFormatter) format(newEntry *logEntry) string { + b := &bytes.Buffer{} + appendKeyValue(b, _TIME, newEntry.Time) + appendKeyValue(b, _LEVEL, newEntry.Level.String()) + if newEntry.Rlevel != NONE { + appendKeyValue(b, _RLEVEL, newEntry.Rlevel.String()) + } + appendKeyValue(b, _MSG, newEntry.Message) + for key, value := range newEntry.Data { + appendKeyValue(b, key, value) + } + b.WriteByte('\n') + s := bytes.NewBuffer(b.Bytes()) + return s.String() +} + +func appendKeyValue(b *bytes.Buffer, key, value interface{}) { + if _, ok := value.(string); ok { + fmt.Fprintf(b, "%v=%s ", key, value) + } else { + fmt.Fprintf(b, "%v=%v ", key, value) + } +} + +type jsonFormatter struct { +} + +// ex. {"_level":"INFO","_msg":"This is a message from test in json format","_time":"2016-02-10T09:12:59.518-08:00"} + +func (*jsonFormatter) format(newEntry *logEntry) string { + if newEntry.Data == nil { + newEntry.Data = make(Map, 5) + } + newEntry.Data[_TIME] = newEntry.Time + newEntry.Data[_LEVEL] = newEntry.Level.String() + if newEntry.Rlevel != NONE { + newEntry.Data[_RLEVEL] = newEntry.Rlevel.String() + } + newEntry.Data[_MSG] = newEntry.Message + serialized, _ := json.Marshal(newEntry.Data) + s := bytes.NewBuffer(append(serialized, '\n')) + return s.String() +} diff --git a/vendor/github.com/couchbase/goutils/scramsha/scramsha.go b/vendor/github.com/couchbase/goutils/scramsha/scramsha.go new file mode 100644 index 0000000000..b234bfc8a9 --- /dev/null +++ b/vendor/github.com/couchbase/goutils/scramsha/scramsha.go @@ -0,0 +1,207 @@ +// @author Couchbase <info@couchbase.com> +// @copyright 2018 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package scramsha provides implementation of client side SCRAM-SHA +// according to https://tools.ietf.org/html/rfc5802 +package scramsha + +import ( + "crypto/hmac" + "crypto/rand" + "crypto/sha1" + "crypto/sha256" + "crypto/sha512" + "encoding/base64" + "fmt" + "github.com/pkg/errors" + "golang.org/x/crypto/pbkdf2" + "hash" + "strconv" + "strings" +) + +func hmacHash(message []byte, secret []byte, hashFunc func() hash.Hash) []byte { + h := hmac.New(hashFunc, secret) + h.Write(message) + return h.Sum(nil) +} + +func shaHash(message []byte, hashFunc func() hash.Hash) []byte { + h := hashFunc() + h.Write(message) + return h.Sum(nil) +} + +func generateClientNonce(size int) (string, error) { + randomBytes := make([]byte, size) + _, err := rand.Read(randomBytes) + if err != nil { + return "", errors.Wrap(err, "Unable to generate nonce") + } + return base64.StdEncoding.EncodeToString(randomBytes), nil +} + +// ScramSha provides context for SCRAM-SHA handling +type ScramSha struct { + hashSize int + hashFunc func() hash.Hash + clientNonce string + serverNonce string + salt []byte + i int + saltedPassword []byte + authMessage string +} + +var knownMethods = []string{"SCRAM-SHA512", "SCRAM-SHA256", "SCRAM-SHA1"} + +// BestMethod returns SCRAM-SHA method we consider the best out of suggested +// by server +func BestMethod(methods string) (string, error) { + for _, m := range knownMethods { + if strings.Index(methods, m) != -1 { + return m, nil + } + } + return "", errors.Errorf( + "None of the server suggested methods [%s] are supported", + methods) +} + +// NewScramSha creates context for SCRAM-SHA handling +func NewScramSha(method string) (*ScramSha, error) { + s := &ScramSha{} + + if method == knownMethods[0] { + s.hashFunc = sha512.New + s.hashSize = 64 + } else if method == knownMethods[1] { + s.hashFunc = sha256.New + s.hashSize = 32 + } else if method == knownMethods[2] { + s.hashFunc = sha1.New + s.hashSize = 20 + } else { + return nil, errors.Errorf("Unsupported method %s", method) + } + return s, nil +} + +// GetStartRequest builds start SCRAM-SHA request to be sent to server +func (s *ScramSha) GetStartRequest(user string) (string, error) { + var err error + s.clientNonce, err = generateClientNonce(24) + if err != nil { + return "", errors.Wrapf(err, "Unable to generate SCRAM-SHA "+ + "start request for user %s", user) + } + + message := fmt.Sprintf("n,,n=%s,r=%s", user, s.clientNonce) + s.authMessage = message[3:] + return message, nil +} + +// HandleStartResponse handles server response on start SCRAM-SHA request +func (s *ScramSha) HandleStartResponse(response string) error { + parts := strings.Split(response, ",") + if len(parts) != 3 { + return errors.Errorf("expected 3 fields in first SCRAM-SHA-1 "+ + "server message %s", response) + } + if !strings.HasPrefix(parts[0], "r=") || len(parts[0]) < 3 { + return errors.Errorf("Server sent an invalid nonce %s", + parts[0]) + } + if !strings.HasPrefix(parts[1], "s=") || len(parts[1]) < 3 { + return errors.Errorf("Server sent an invalid salt %s", parts[1]) + } + if !strings.HasPrefix(parts[2], "i=") || len(parts[2]) < 3 { + return errors.Errorf("Server sent an invalid iteration count %s", + parts[2]) + } + + s.serverNonce = parts[0][2:] + encodedSalt := parts[1][2:] + var err error + s.i, err = strconv.Atoi(parts[2][2:]) + if err != nil { + return errors.Errorf("Iteration count %s must be integer.", + parts[2][2:]) + } + + if s.i < 1 { + return errors.New("Iteration count should be positive") + } + + if !strings.HasPrefix(s.serverNonce, s.clientNonce) { + return errors.Errorf("Server nonce %s doesn't contain client"+ + " nonce %s", s.serverNonce, s.clientNonce) + } + + s.salt, err = base64.StdEncoding.DecodeString(encodedSalt) + if err != nil { + return errors.Wrapf(err, "Unable to decode salt %s", + encodedSalt) + } + + s.authMessage = s.authMessage + "," + response + return nil +} + +// GetFinalRequest builds final SCRAM-SHA request to be sent to server +func (s *ScramSha) GetFinalRequest(pass string) string { + clientFinalMessageBare := "c=biws,r=" + s.serverNonce + s.authMessage = s.authMessage + "," + clientFinalMessageBare + + s.saltedPassword = pbkdf2.Key([]byte(pass), s.salt, s.i, + s.hashSize, s.hashFunc) + + clientKey := hmacHash([]byte("Client Key"), s.saltedPassword, s.hashFunc) + storedKey := shaHash(clientKey, s.hashFunc) + clientSignature := hmacHash([]byte(s.authMessage), storedKey, s.hashFunc) + + clientProof := make([]byte, len(clientSignature)) + for i := 0; i < len(clientSignature); i++ { + clientProof[i] = clientKey[i] ^ clientSignature[i] + } + + return clientFinalMessageBare + ",p=" + + base64.StdEncoding.EncodeToString(clientProof) +} + +// HandleFinalResponse handles server's response on final SCRAM-SHA request +func (s *ScramSha) HandleFinalResponse(response string) error { + if strings.Contains(response, ",") || + !strings.HasPrefix(response, "v=") { + return errors.Errorf("Server sent an invalid final message %s", + response) + } + + decodedMessage, err := base64.StdEncoding.DecodeString(response[2:]) + if err != nil { + return errors.Wrapf(err, "Unable to decode server message %s", + response[2:]) + } + serverKey := hmacHash([]byte("Server Key"), s.saltedPassword, + s.hashFunc) + serverSignature := hmacHash([]byte(s.authMessage), serverKey, + s.hashFunc) + if string(decodedMessage) != string(serverSignature) { + return errors.Errorf("Server proof %s doesn't match "+ + "the expected: %s", + string(decodedMessage), string(serverSignature)) + } + return nil +} diff --git a/vendor/github.com/couchbase/goutils/scramsha/scramsha_http.go b/vendor/github.com/couchbase/goutils/scramsha/scramsha_http.go new file mode 100644 index 0000000000..19f32b3134 --- /dev/null +++ b/vendor/github.com/couchbase/goutils/scramsha/scramsha_http.go @@ -0,0 +1,252 @@ +// @author Couchbase <info@couchbase.com> +// @copyright 2018 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package scramsha provides implementation of client side SCRAM-SHA +// via Http according to https://tools.ietf.org/html/rfc7804 +package scramsha + +import ( + "encoding/base64" + "github.com/pkg/errors" + "io" + "io/ioutil" + "net/http" + "strings" +) + +// consts used to parse scramsha response from target +const ( + WWWAuthenticate = "WWW-Authenticate" + AuthenticationInfo = "Authentication-Info" + Authorization = "Authorization" + DataPrefix = "data=" + SidPrefix = "sid=" +) + +// Request provides implementation of http request that can be retried +type Request struct { + body io.ReadSeeker + + // Embed an HTTP request directly. This makes a *Request act exactly + // like an *http.Request so that all meta methods are supported. + *http.Request +} + +type lenReader interface { + Len() int +} + +// NewRequest creates http request that can be retried +func NewRequest(method, url string, body io.ReadSeeker) (*Request, error) { + // Wrap the body in a noop ReadCloser if non-nil. This prevents the + // reader from being closed by the HTTP client. + var rcBody io.ReadCloser + if body != nil { + rcBody = ioutil.NopCloser(body) + } + + // Make the request with the noop-closer for the body. + httpReq, err := http.NewRequest(method, url, rcBody) + if err != nil { + return nil, err + } + + // Check if we can set the Content-Length automatically. + if lr, ok := body.(lenReader); ok { + httpReq.ContentLength = int64(lr.Len()) + } + + return &Request{body, httpReq}, nil +} + +func encode(str string) string { + return base64.StdEncoding.EncodeToString([]byte(str)) +} + +func decode(str string) (string, error) { + bytes, err := base64.StdEncoding.DecodeString(str) + if err != nil { + return "", errors.Errorf("Cannot base64 decode %s", + str) + } + return string(bytes), err +} + +func trimPrefix(s, prefix string) (string, error) { + l := len(s) + trimmed := strings.TrimPrefix(s, prefix) + if l == len(trimmed) { + return trimmed, errors.Errorf("Prefix %s not found in %s", + prefix, s) + } + return trimmed, nil +} + +func drainBody(resp *http.Response) { + defer resp.Body.Close() + io.Copy(ioutil.Discard, resp.Body) +} + +// DoScramSha performs SCRAM-SHA handshake via Http +func DoScramSha(req *Request, + username string, + password string, + client *http.Client) (*http.Response, error) { + + method := "SCRAM-SHA-512" + s, err := NewScramSha("SCRAM-SHA512") + if err != nil { + return nil, errors.Wrap(err, + "Unable to initialize SCRAM-SHA handler") + } + + message, err := s.GetStartRequest(username) + if err != nil { + return nil, err + } + + encodedMessage := method + " " + DataPrefix + encode(message) + + req.Header.Set(Authorization, encodedMessage) + + res, err := client.Do(req.Request) + if err != nil { + return nil, errors.Wrap(err, "Problem sending SCRAM-SHA start"+ + "request") + } + + if res.StatusCode != http.StatusUnauthorized { + return res, nil + } + + authHeader := res.Header.Get(WWWAuthenticate) + if authHeader == "" { + drainBody(res) + return nil, errors.Errorf("Header %s is not populated in "+ + "SCRAM-SHA start response", WWWAuthenticate) + } + + authHeader, err = trimPrefix(authHeader, method+" ") + if err != nil { + if strings.HasPrefix(authHeader, "Basic ") { + // user not found + return res, nil + } + drainBody(res) + return nil, errors.Wrapf(err, "Error while parsing SCRAM-SHA "+ + "start response %s", authHeader) + } + + drainBody(res) + + sid, response, err := parseSidAndData(authHeader) + if err != nil { + return nil, errors.Wrapf(err, "Error while parsing SCRAM-SHA "+ + "start response %s", authHeader) + } + + err = s.HandleStartResponse(response) + if err != nil { + return nil, errors.Wrapf(err, "Error parsing SCRAM-SHA start "+ + "response %s", response) + } + + message = s.GetFinalRequest(password) + encodedMessage = method + " " + SidPrefix + sid + "," + DataPrefix + + encode(message) + + req.Header.Set(Authorization, encodedMessage) + + // rewind request body so it can be resent again + if req.body != nil { + if _, err = req.body.Seek(0, 0); err != nil { + return nil, errors.Errorf("Failed to seek body: %v", + err) + } + } + + res, err = client.Do(req.Request) + if err != nil { + return nil, errors.Wrap(err, "Problem sending SCRAM-SHA final"+ + "request") + } + + if res.StatusCode == http.StatusUnauthorized { + // TODO retrieve and return error + return res, nil + } + + if res.StatusCode >= http.StatusInternalServerError { + // in this case we cannot expect server to set headers properly + return res, nil + } + + authHeader = res.Header.Get(AuthenticationInfo) + if authHeader == "" { + drainBody(res) + return nil, errors.Errorf("Header %s is not populated in "+ + "SCRAM-SHA final response", AuthenticationInfo) + } + + finalSid, response, err := parseSidAndData(authHeader) + if err != nil { + drainBody(res) + return nil, errors.Wrapf(err, "Error while parsing SCRAM-SHA "+ + "final response %s", authHeader) + } + + if finalSid != sid { + drainBody(res) + return nil, errors.Errorf("Sid %s returned by server "+ + "doesn't match the original sid %s", finalSid, sid) + } + + err = s.HandleFinalResponse(response) + if err != nil { + drainBody(res) + return nil, errors.Wrapf(err, + "Error handling SCRAM-SHA final server response %s", + response) + } + return res, nil +} + +func parseSidAndData(authHeader string) (string, string, error) { + sidIndex := strings.Index(authHeader, SidPrefix) + if sidIndex < 0 { + return "", "", errors.Errorf("Cannot find %s in %s", + SidPrefix, authHeader) + } + + sidEndIndex := strings.Index(authHeader, ",") + if sidEndIndex < 0 { + return "", "", errors.Errorf("Cannot find ',' in %s", + authHeader) + } + + sid := authHeader[sidIndex+len(SidPrefix) : sidEndIndex] + + dataIndex := strings.Index(authHeader, DataPrefix) + if dataIndex < 0 { + return "", "", errors.Errorf("Cannot find %s in %s", + DataPrefix, authHeader) + } + + data, err := decode(authHeader[dataIndex+len(DataPrefix):]) + if err != nil { + return "", "", err + } + return sid, data, nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/LICENSE b/vendor/github.com/couchbaselabs/go-couchbase/LICENSE new file mode 100644 index 0000000000..0b23ef358e --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2013 Couchbase, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/couchbaselabs/go-couchbase/audit.go b/vendor/github.com/couchbaselabs/go-couchbase/audit.go new file mode 100644 index 0000000000..3db7d9f9ff --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/audit.go @@ -0,0 +1,32 @@ +package couchbase + +import () + +// Sample data: +// {"disabled":["12333", "22244"],"uid":"132492431","auditdEnabled":true, +// "disabledUsers":[{"name":"bill","domain":"local"},{"name":"bob","domain":"local"}], +// "logPath":"/Users/johanlarson/Library/Application Support/Couchbase/var/lib/couchbase/logs", +// "rotateInterval":86400,"rotateSize":20971520} +type AuditSpec struct { + Disabled []uint32 `json:"disabled"` + Uid string `json:"uid"` + AuditdEnabled bool `json:"auditdEnabled` + DisabledUsers []AuditUser `json:"disabledUsers"` + LogPath string `json:"logPath"` + RotateInterval int64 `json:"rotateInterval"` + RotateSize int64 `json:"rotateSize"` +} + +type AuditUser struct { + Name string `json:"name"` + Domain string `json:"domain"` +} + +func (c *Client) GetAuditSpec() (*AuditSpec, error) { + ret := &AuditSpec{} + err := c.parseURLResponse("/settings/audit", ret) + if err != nil { + return nil, err + } + return ret, nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/client.go b/vendor/github.com/couchbaselabs/go-couchbase/client.go new file mode 100644 index 0000000000..43c3382960 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/client.go @@ -0,0 +1,1385 @@ +/* +Package couchbase provides a smart client for go. + +Usage: + + client, err := couchbase.Connect("http://myserver:8091/") + handleError(err) + pool, err := client.GetPool("default") + handleError(err) + bucket, err := pool.GetBucket("MyAwesomeBucket") + handleError(err) + ... + +or a shortcut for the bucket directly + + bucket, err := couchbase.GetBucket("http://myserver:8091/", "default", "default") + +in any case, you can specify authentication credentials using +standard URL userinfo syntax: + + b, err := couchbase.GetBucket("http://bucketname:bucketpass@myserver:8091/", + "default", "bucket") +*/ +package couchbase + +import ( + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "io" + "runtime" + "strconv" + "strings" + "sync" + "time" + "unsafe" + + "github.com/couchbase/gomemcached" + "github.com/couchbase/gomemcached/client" // package name is 'memcached' + "github.com/couchbase/goutils/logging" +) + +// Mutation Token +type MutationToken struct { + VBid uint16 // vbucket id + Guard uint64 // vbuuid + Value uint64 // sequence number +} + +// Maximum number of times to retry a chunk of a bulk get on error. +var MaxBulkRetries = 5000 +var backOffDuration time.Duration = 100 * time.Millisecond +var MaxBackOffRetries = 25 // exponentail backOff result in over 30sec (25*13*0.1s) + +// If this is set to a nonzero duration, Do() and ViewCustom() will log a warning if the call +// takes longer than that. +var SlowServerCallWarningThreshold time.Duration + +func slowLog(startTime time.Time, format string, args ...interface{}) { + if elapsed := time.Now().Sub(startTime); elapsed > SlowServerCallWarningThreshold { + pc, _, _, _ := runtime.Caller(2) + caller := runtime.FuncForPC(pc).Name() + logging.Infof("go-couchbase: "+format+" in "+caller+" took "+elapsed.String(), args...) + } +} + +// Return true if error is KEY_ENOENT. Required by cbq-engine +func IsKeyEExistsError(err error) bool { + + res, ok := err.(*gomemcached.MCResponse) + if ok && res.Status == gomemcached.KEY_EEXISTS { + return true + } + + return false +} + +// Return true if error is KEY_ENOENT. Required by cbq-engine +func IsKeyNoEntError(err error) bool { + + res, ok := err.(*gomemcached.MCResponse) + if ok && res.Status == gomemcached.KEY_ENOENT { + return true + } + + return false +} + +// Return true if error suggests a bucket refresh is required. Required by cbq-engine +func IsRefreshRequired(err error) bool { + + res, ok := err.(*gomemcached.MCResponse) + if ok && (res.Status == gomemcached.NO_BUCKET || res.Status == gomemcached.NOT_MY_VBUCKET) { + return true + } + + return false +} + +// ClientOpCallback is called for each invocation of Do. +var ClientOpCallback func(opname, k string, start time.Time, err error) + +// Do executes a function on a memcached connection to the node owning key "k" +// +// Note that this automatically handles transient errors by replaying +// your function on a "not-my-vbucket" error, so don't assume +// your command will only be executed only once. +func (b *Bucket) Do(k string, f func(mc *memcached.Client, vb uint16) error) (err error) { + return b.Do2(k, f, true) +} + +func (b *Bucket) Do2(k string, f func(mc *memcached.Client, vb uint16) error, deadline bool) (err error) { + if SlowServerCallWarningThreshold > 0 { + defer slowLog(time.Now(), "call to Do(%q)", k) + } + + vb := b.VBHash(k) + maxTries := len(b.Nodes()) * 2 + for i := 0; i < maxTries; i++ { + conn, pool, err := b.getConnectionToVBucket(vb) + if err != nil { + if isConnError(err) && backOff(i, maxTries, backOffDuration, true) { + b.Refresh() + continue + } + return err + } + + if deadline && DefaultTimeout > 0 { + conn.SetDeadline(getDeadline(noDeadline, DefaultTimeout)) + err = f(conn, uint16(vb)) + conn.SetDeadline(noDeadline) + } else { + err = f(conn, uint16(vb)) + } + + var retry bool + discard := isOutOfBoundsError(err) + + // MB-30967 / MB-31001 implement back off for transient errors + if resp, ok := err.(*gomemcached.MCResponse); ok { + switch resp.Status { + case gomemcached.NOT_MY_VBUCKET: + b.Refresh() + // MB-28842: in case of NMVB, check if the node is still part of the map + // and ditch the connection if it isn't. + discard = b.checkVBmap(pool.Node()) + retry = true + case gomemcached.NOT_SUPPORTED: + discard = true + retry = true + case gomemcached.ENOMEM: + fallthrough + case gomemcached.TMPFAIL: + retry = backOff(i, maxTries, backOffDuration, true) + default: + retry = false + } + } else if err != nil && isConnError(err) && backOff(i, maxTries, backOffDuration, true) { + retry = true + } + + if discard { + pool.Discard(conn) + } else { + pool.Return(conn) + } + + if !retry { + return err + } + } + + return fmt.Errorf("unable to complete action after %v attemps", maxTries) +} + +type GatheredStats struct { + Server string + Stats map[string]string + Err error +} + +func getStatsParallel(sn string, b *Bucket, offset int, which string, + ch chan<- GatheredStats) { + pool := b.getConnPool(offset) + var gatheredStats GatheredStats + + conn, err := pool.Get() + defer func() { + pool.Return(conn) + ch <- gatheredStats + }() + + if err != nil { + gatheredStats = GatheredStats{Server: sn, Err: err} + } else { + sm, err := conn.StatsMap(which) + gatheredStats = GatheredStats{Server: sn, Stats: sm, Err: err} + } +} + +// GetStats gets a set of stats from all servers. +// +// Returns a map of server ID -> map of stat key to map value. +func (b *Bucket) GetStats(which string) map[string]map[string]string { + rv := map[string]map[string]string{} + for server, gs := range b.GatherStats(which) { + if len(gs.Stats) > 0 { + rv[server] = gs.Stats + } + } + return rv +} + +// GatherStats returns a map of server ID -> GatheredStats from all servers. +func (b *Bucket) GatherStats(which string) map[string]GatheredStats { + vsm := b.VBServerMap() + if vsm.ServerList == nil { + return nil + } + + // Go grab all the things at once. + ch := make(chan GatheredStats, len(vsm.ServerList)) + for i, sn := range vsm.ServerList { + go getStatsParallel(sn, b, i, which, ch) + } + + // Gather the results + rv := map[string]GatheredStats{} + for range vsm.ServerList { + gs := <-ch + rv[gs.Server] = gs + } + return rv +} + +// Get bucket count through the bucket stats +func (b *Bucket) GetCount(refresh bool) (count int64, err error) { + if refresh { + b.Refresh() + } + + var cnt int64 + for _, gs := range b.GatherStats("") { + if len(gs.Stats) > 0 { + cnt, err = strconv.ParseInt(gs.Stats["curr_items"], 10, 64) + if err != nil { + return 0, err + } + count += cnt + } + } + + return count, nil +} + +func isAuthError(err error) bool { + estr := err.Error() + return strings.Contains(estr, "Auth failure") +} + +func IsReadTimeOutError(err error) bool { + estr := err.Error() + return strings.Contains(estr, "read tcp") || + strings.Contains(estr, "i/o timeout") +} + +func isTimeoutError(err error) bool { + estr := err.Error() + return strings.Contains(estr, "i/o timeout") || + strings.Contains(estr, "connection timed out") || + strings.Contains(estr, "no route to host") +} + +// Errors that are not considered fatal for our fetch loop +func isConnError(err error) bool { + if err == io.EOF { + return true + } + estr := err.Error() + return strings.Contains(estr, "broken pipe") || + strings.Contains(estr, "connection reset") || + strings.Contains(estr, "connection refused") || + strings.Contains(estr, "connection pool is closed") +} + +func isOutOfBoundsError(err error) bool { + return err != nil && strings.Contains(err.Error(), "Out of Bounds error") + +} + +func getDeadline(reqDeadline time.Time, duration time.Duration) time.Time { + if reqDeadline.IsZero() && duration > 0 { + return time.Now().Add(duration) + } + return reqDeadline +} + +func backOff(attempt, maxAttempts int, duration time.Duration, exponential bool) bool { + if attempt < maxAttempts { + // 0th attempt return immediately + if attempt > 0 { + if exponential { + duration = time.Duration(attempt) * duration + } + time.Sleep(duration) + } + return true + } + + return false +} + +func (b *Bucket) doBulkGet(vb uint16, keys []string, reqDeadline time.Time, + ch chan<- map[string]*gomemcached.MCResponse, ech chan<- error, subPaths []string, + eStatus *errorStatus) { + if SlowServerCallWarningThreshold > 0 { + defer slowLog(time.Now(), "call to doBulkGet(%d, %d keys)", vb, len(keys)) + } + + rv := _STRING_MCRESPONSE_POOL.Get() + attempts := 0 + backOffAttempts := 0 + done := false + bname := b.Name + for ; attempts < MaxBulkRetries && !done && !eStatus.errStatus; attempts++ { + + if len(b.VBServerMap().VBucketMap) < int(vb) { + //fatal + err := fmt.Errorf("vbmap smaller than requested for %v", bname) + logging.Errorf("go-couchbase: %v vb %d vbmap len %d", err.Error(), vb, len(b.VBServerMap().VBucketMap)) + ech <- err + return + } + + masterID := b.VBServerMap().VBucketMap[vb][0] + + if masterID < 0 { + // fatal + err := fmt.Errorf("No master node available for %v vb %d", bname, vb) + logging.Errorf("%v", err.Error()) + ech <- err + return + } + + // This stack frame exists to ensure we can clean up + // connection at a reasonable time. + err := func() error { + pool := b.getConnPool(masterID) + conn, err := pool.Get() + if err != nil { + if isAuthError(err) || isTimeoutError(err) { + logging.Errorf("Fatal Error %v : %v", bname, err) + ech <- err + return err + } else if isConnError(err) { + if !backOff(backOffAttempts, MaxBackOffRetries, backOffDuration, true) { + logging.Errorf("Connection Error %v : %v", bname, err) + ech <- err + return err + } + b.Refresh() + backOffAttempts++ + } + logging.Infof("Pool Get returned %v: %v", bname, err) + // retry + return nil + } + + conn.SetDeadline(getDeadline(reqDeadline, DefaultTimeout)) + err = conn.GetBulk(vb, keys, rv, subPaths) + conn.SetDeadline(noDeadline) + + discard := false + defer func() { + if discard { + pool.Discard(conn) + } else { + pool.Return(conn) + } + }() + + switch err.(type) { + case *gomemcached.MCResponse: + notSMaxTries := len(b.Nodes()) * 2 + st := err.(*gomemcached.MCResponse).Status + if st == gomemcached.NOT_MY_VBUCKET || (st == gomemcached.NOT_SUPPORTED && attempts < notSMaxTries) { + b.Refresh() + discard = b.checkVBmap(pool.Node()) + return nil // retry + } else if st == gomemcached.EBUSY || st == gomemcached.LOCKED { + if (attempts % (MaxBulkRetries / 100)) == 0 { + logging.Infof("Retrying Memcached error (%v) FOR %v(vbid:%d, keys:<ud>%v</ud>)", + err.Error(), bname, vb, keys) + } + return nil // retry + } else if (st == gomemcached.ENOMEM || st == gomemcached.TMPFAIL) && backOff(backOffAttempts, MaxBackOffRetries, backOffDuration, true) { + // MB-30967 / MB-31001 use backoff for TMPFAIL too + backOffAttempts++ + logging.Infof("Retrying Memcached error (%v) FOR %v(vbid:%d, keys:<ud>%v</ud>)", + err.Error(), bname, vb, keys) + return nil // retry + } + ech <- err + return err + case error: + if isOutOfBoundsError(err) { + // We got an out of bound error, retry the operation + discard = true + return nil + } else if isConnError(err) && backOff(backOffAttempts, MaxBackOffRetries, backOffDuration, true) { + backOffAttempts++ + logging.Errorf("Connection Error: %s. Refreshing bucket %v (vbid:%v,keys:<ud>%v</ud>)", + err.Error(), bname, vb, keys) + discard = true + b.Refresh() + return nil // retry + } + ech <- err + ch <- rv + return err + } + + done = true + return nil + }() + + if err != nil { + return + } + } + + if attempts >= MaxBulkRetries { + err := fmt.Errorf("bulkget exceeded MaxBulkRetries for %v(vbid:%d,keys:<ud>%v</ud>)", bname, vb, keys) + logging.Errorf("%v", err.Error()) + ech <- err + } + + ch <- rv +} + +type errorStatus struct { + errStatus bool +} + +type vbBulkGet struct { + b *Bucket + ch chan<- map[string]*gomemcached.MCResponse + ech chan<- error + k uint16 + keys []string + reqDeadline time.Time + wg *sync.WaitGroup + subPaths []string + groupError *errorStatus +} + +const _NUM_CHANNELS = 5 + +var _NUM_CHANNEL_WORKERS = (runtime.NumCPU() + 1) / 2 +var DefaultDialTimeout = time.Duration(0) +var DefaultTimeout = time.Duration(0) +var noDeadline = time.Time{} + +// Buffer 4k requests per worker +var _VB_BULK_GET_CHANNELS []chan *vbBulkGet + +func InitBulkGet() { + + DefaultDialTimeout = 20 * time.Second + DefaultTimeout = 120 * time.Second + + memcached.SetDefaultDialTimeout(DefaultDialTimeout) + + _VB_BULK_GET_CHANNELS = make([]chan *vbBulkGet, _NUM_CHANNELS) + + for i := 0; i < _NUM_CHANNELS; i++ { + channel := make(chan *vbBulkGet, 16*1024*_NUM_CHANNEL_WORKERS) + _VB_BULK_GET_CHANNELS[i] = channel + + for j := 0; j < _NUM_CHANNEL_WORKERS; j++ { + go vbBulkGetWorker(channel) + } + } +} + +func vbBulkGetWorker(ch chan *vbBulkGet) { + defer func() { + // Workers cannot panic and die + recover() + go vbBulkGetWorker(ch) + }() + + for vbg := range ch { + vbDoBulkGet(vbg) + } +} + +func vbDoBulkGet(vbg *vbBulkGet) { + defer vbg.wg.Done() + defer func() { + // Workers cannot panic and die + recover() + }() + vbg.b.doBulkGet(vbg.k, vbg.keys, vbg.reqDeadline, vbg.ch, vbg.ech, vbg.subPaths, vbg.groupError) +} + +var _ERR_CHAN_FULL = fmt.Errorf("Data request queue full, aborting query.") + +func (b *Bucket) processBulkGet(kdm map[uint16][]string, reqDeadline time.Time, + ch chan<- map[string]*gomemcached.MCResponse, ech chan<- error, subPaths []string, + eStatus *errorStatus) { + + defer close(ch) + defer close(ech) + + wg := &sync.WaitGroup{} + + for k, keys := range kdm { + + // GetBulk() group has error donot Queue items for this group + if eStatus.errStatus { + break + } + + vbg := &vbBulkGet{ + b: b, + ch: ch, + ech: ech, + k: k, + keys: keys, + reqDeadline: reqDeadline, + wg: wg, + subPaths: subPaths, + groupError: eStatus, + } + + wg.Add(1) + + // Random int + // Right shift to avoid 8-byte alignment, and take low bits + c := (uintptr(unsafe.Pointer(vbg)) >> 4) % _NUM_CHANNELS + + select { + case _VB_BULK_GET_CHANNELS[c] <- vbg: + // No-op + default: + // Buffer full, abandon the bulk get + ech <- _ERR_CHAN_FULL + wg.Add(-1) + } + } + + // Wait for my vb bulk gets + wg.Wait() +} + +type multiError []error + +func (m multiError) Error() string { + if len(m) == 0 { + panic("Error of none") + } + + return fmt.Sprintf("{%v errors, starting with %v}", len(m), m[0].Error()) +} + +// Convert a stream of errors from ech into a multiError (or nil) and +// send down eout. +// +// At least one send is guaranteed on eout, but two is possible, so +// buffer the out channel appropriately. +func errorCollector(ech <-chan error, eout chan<- error, eStatus *errorStatus) { + defer func() { eout <- nil }() + var errs multiError + for e := range ech { + if !eStatus.errStatus && !IsKeyNoEntError(e) { + eStatus.errStatus = true + } + + errs = append(errs, e) + } + + if len(errs) > 0 { + eout <- errs + } +} + +// Fetches multiple keys concurrently, with []byte values +// +// This is a wrapper around GetBulk which converts all values returned +// by GetBulk from raw memcached responses into []byte slices. +// Returns one document for duplicate keys +func (b *Bucket) GetBulkRaw(keys []string) (map[string][]byte, error) { + + resp, eout := b.getBulk(keys, noDeadline, nil) + + rv := make(map[string][]byte, len(keys)) + for k, av := range resp { + rv[k] = av.Body + } + + b.ReleaseGetBulkPools(resp) + return rv, eout + +} + +// GetBulk fetches multiple keys concurrently. +// +// Unlike more convenient GETs, the entire response is returned in the +// map array for each key. Keys that were not found will not be included in +// the map. + +func (b *Bucket) GetBulk(keys []string, reqDeadline time.Time, subPaths []string) (map[string]*gomemcached.MCResponse, error) { + return b.getBulk(keys, reqDeadline, subPaths) +} + +func (b *Bucket) ReleaseGetBulkPools(rv map[string]*gomemcached.MCResponse) { + _STRING_MCRESPONSE_POOL.Put(rv) +} + +func (b *Bucket) getBulk(keys []string, reqDeadline time.Time, subPaths []string) (map[string]*gomemcached.MCResponse, error) { + kdm := _VB_STRING_POOL.Get() + defer _VB_STRING_POOL.Put(kdm) + for _, k := range keys { + if k != "" { + vb := uint16(b.VBHash(k)) + a, ok1 := kdm[vb] + if !ok1 { + a = _STRING_POOL.Get() + } + kdm[vb] = append(a, k) + } + } + + eout := make(chan error, 2) + groupErrorStatus := &errorStatus{} + + // processBulkGet will own both of these channels and + // guarantee they're closed before it returns. + ch := make(chan map[string]*gomemcached.MCResponse) + ech := make(chan error) + + go errorCollector(ech, eout, groupErrorStatus) + go b.processBulkGet(kdm, reqDeadline, ch, ech, subPaths, groupErrorStatus) + + var rv map[string]*gomemcached.MCResponse + + for m := range ch { + if rv == nil { + rv = m + continue + } + + for k, v := range m { + rv[k] = v + } + _STRING_MCRESPONSE_POOL.Put(m) + } + + return rv, <-eout +} + +// WriteOptions is the set of option flags availble for the Write +// method. They are ORed together to specify the desired request. +type WriteOptions int + +const ( + // Raw specifies that the value is raw []byte or nil; don't + // JSON-encode it. + Raw = WriteOptions(1 << iota) + // AddOnly indicates an item should only be written if it + // doesn't exist, otherwise ErrKeyExists is returned. + AddOnly + // Persist causes the operation to block until the server + // confirms the item is persisted. + Persist + // Indexable causes the operation to block until it's availble via the index. + Indexable + // Append indicates the given value should be appended to the + // existing value for the given key. + Append +) + +var optNames = []struct { + opt WriteOptions + name string +}{ + {Raw, "raw"}, + {AddOnly, "addonly"}, {Persist, "persist"}, + {Indexable, "indexable"}, {Append, "append"}, +} + +// String representation of WriteOptions +func (w WriteOptions) String() string { + f := []string{} + for _, on := range optNames { + if w&on.opt != 0 { + f = append(f, on.name) + w &= ^on.opt + } + } + if len(f) == 0 || w != 0 { + f = append(f, fmt.Sprintf("0x%x", int(w))) + } + return strings.Join(f, "|") +} + +// Error returned from Write with AddOnly flag, when key already exists in the bucket. +var ErrKeyExists = errors.New("key exists") + +// General-purpose value setter. +// +// The Set, Add and Delete methods are just wrappers around this. The +// interpretation of `v` depends on whether the `Raw` option is +// given. If it is, v must be a byte array or nil. (A nil value causes +// a delete.) If `Raw` is not given, `v` will be marshaled as JSON +// before being written. It must be JSON-marshalable and it must not +// be nil. +func (b *Bucket) Write(k string, flags, exp int, v interface{}, + opt WriteOptions) (err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { + ClientOpCallback(fmt.Sprintf("Write(%v)", opt), k, t, err) + }(time.Now()) + } + + var data []byte + if opt&Raw == 0 { + data, err = json.Marshal(v) + if err != nil { + return err + } + } else if v != nil { + data = v.([]byte) + } + + var res *gomemcached.MCResponse + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + if opt&AddOnly != 0 { + res, err = memcached.UnwrapMemcachedError( + mc.Add(vb, k, flags, exp, data)) + if err == nil && res.Status != gomemcached.SUCCESS { + if res.Status == gomemcached.KEY_EEXISTS { + err = ErrKeyExists + } else { + err = res + } + } + } else if opt&Append != 0 { + res, err = mc.Append(vb, k, data) + } else if data == nil { + res, err = mc.Del(vb, k) + } else { + res, err = mc.Set(vb, k, flags, exp, data) + } + + return err + }) + + if err == nil && (opt&(Persist|Indexable) != 0) { + err = b.WaitForPersistence(k, res.Cas, data == nil) + } + + return err +} + +func (b *Bucket) WriteWithMT(k string, flags, exp int, v interface{}, + opt WriteOptions) (mt *MutationToken, err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { + ClientOpCallback(fmt.Sprintf("WriteWithMT(%v)", opt), k, t, err) + }(time.Now()) + } + + var data []byte + if opt&Raw == 0 { + data, err = json.Marshal(v) + if err != nil { + return nil, err + } + } else if v != nil { + data = v.([]byte) + } + + var res *gomemcached.MCResponse + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + if opt&AddOnly != 0 { + res, err = memcached.UnwrapMemcachedError( + mc.Add(vb, k, flags, exp, data)) + if err == nil && res.Status != gomemcached.SUCCESS { + if res.Status == gomemcached.KEY_EEXISTS { + err = ErrKeyExists + } else { + err = res + } + } + } else if opt&Append != 0 { + res, err = mc.Append(vb, k, data) + } else if data == nil { + res, err = mc.Del(vb, k) + } else { + res, err = mc.Set(vb, k, flags, exp, data) + } + + if len(res.Extras) >= 16 { + vbuuid := uint64(binary.BigEndian.Uint64(res.Extras[0:8])) + seqNo := uint64(binary.BigEndian.Uint64(res.Extras[8:16])) + mt = &MutationToken{VBid: vb, Guard: vbuuid, Value: seqNo} + } + + return err + }) + + if err == nil && (opt&(Persist|Indexable) != 0) { + err = b.WaitForPersistence(k, res.Cas, data == nil) + } + + return mt, err +} + +// Set a value in this bucket with Cas and return the new Cas value +func (b *Bucket) Cas(k string, exp int, cas uint64, v interface{}) (uint64, error) { + return b.WriteCas(k, 0, exp, cas, v, 0) +} + +// Set a value in this bucket with Cas without json encoding it +func (b *Bucket) CasRaw(k string, exp int, cas uint64, v interface{}) (uint64, error) { + return b.WriteCas(k, 0, exp, cas, v, Raw) +} + +func (b *Bucket) WriteCas(k string, flags, exp int, cas uint64, v interface{}, + opt WriteOptions) (newCas uint64, err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { + ClientOpCallback(fmt.Sprintf("Write(%v)", opt), k, t, err) + }(time.Now()) + } + + var data []byte + if opt&Raw == 0 { + data, err = json.Marshal(v) + if err != nil { + return 0, err + } + } else if v != nil { + data = v.([]byte) + } + + var res *gomemcached.MCResponse + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err = mc.SetCas(vb, k, flags, exp, cas, data) + return err + }) + + if err == nil && (opt&(Persist|Indexable) != 0) { + err = b.WaitForPersistence(k, res.Cas, data == nil) + } + + return res.Cas, err +} + +// Extended CAS operation. These functions will return the mutation token, i.e vbuuid & guard +func (b *Bucket) CasWithMeta(k string, flags int, exp int, cas uint64, v interface{}) (uint64, *MutationToken, error) { + return b.WriteCasWithMT(k, flags, exp, cas, v, 0) +} + +func (b *Bucket) CasWithMetaRaw(k string, flags int, exp int, cas uint64, v interface{}) (uint64, *MutationToken, error) { + return b.WriteCasWithMT(k, flags, exp, cas, v, Raw) +} + +func (b *Bucket) WriteCasWithMT(k string, flags, exp int, cas uint64, v interface{}, + opt WriteOptions) (newCas uint64, mt *MutationToken, err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { + ClientOpCallback(fmt.Sprintf("Write(%v)", opt), k, t, err) + }(time.Now()) + } + + var data []byte + if opt&Raw == 0 { + data, err = json.Marshal(v) + if err != nil { + return 0, nil, err + } + } else if v != nil { + data = v.([]byte) + } + + var res *gomemcached.MCResponse + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err = mc.SetCas(vb, k, flags, exp, cas, data) + return err + }) + + if err != nil { + return 0, nil, err + } + + // check for extras + if len(res.Extras) >= 16 { + vbuuid := uint64(binary.BigEndian.Uint64(res.Extras[0:8])) + seqNo := uint64(binary.BigEndian.Uint64(res.Extras[8:16])) + vb := b.VBHash(k) + mt = &MutationToken{VBid: uint16(vb), Guard: vbuuid, Value: seqNo} + } + + if err == nil && (opt&(Persist|Indexable) != 0) { + err = b.WaitForPersistence(k, res.Cas, data == nil) + } + + return res.Cas, mt, err +} + +// Set a value in this bucket. +// The value will be serialized into a JSON document. +func (b *Bucket) Set(k string, exp int, v interface{}) error { + return b.Write(k, 0, exp, v, 0) +} + +// Set a value in this bucket with with flags +func (b *Bucket) SetWithMeta(k string, flags int, exp int, v interface{}) (*MutationToken, error) { + return b.WriteWithMT(k, flags, exp, v, 0) +} + +// SetRaw sets a value in this bucket without JSON encoding it. +func (b *Bucket) SetRaw(k string, exp int, v []byte) error { + return b.Write(k, 0, exp, v, Raw) +} + +// Add adds a value to this bucket; like Set except that nothing +// happens if the key exists. The value will be serialized into a +// JSON document. +func (b *Bucket) Add(k string, exp int, v interface{}) (added bool, err error) { + err = b.Write(k, 0, exp, v, AddOnly) + if err == ErrKeyExists { + return false, nil + } + return (err == nil), err +} + +// AddRaw adds a value to this bucket; like SetRaw except that nothing +// happens if the key exists. The value will be stored as raw bytes. +func (b *Bucket) AddRaw(k string, exp int, v []byte) (added bool, err error) { + err = b.Write(k, 0, exp, v, AddOnly|Raw) + if err == ErrKeyExists { + return false, nil + } + return (err == nil), err +} + +// Add adds a value to this bucket; like Set except that nothing +// happens if the key exists. The value will be serialized into a +// JSON document. +func (b *Bucket) AddWithMT(k string, exp int, v interface{}) (added bool, mt *MutationToken, err error) { + mt, err = b.WriteWithMT(k, 0, exp, v, AddOnly) + if err == ErrKeyExists { + return false, mt, nil + } + return (err == nil), mt, err +} + +// AddRaw adds a value to this bucket; like SetRaw except that nothing +// happens if the key exists. The value will be stored as raw bytes. +func (b *Bucket) AddRawWithMT(k string, exp int, v []byte) (added bool, mt *MutationToken, err error) { + mt, err = b.WriteWithMT(k, 0, exp, v, AddOnly|Raw) + if err == ErrKeyExists { + return false, mt, nil + } + return (err == nil), mt, err +} + +// Append appends raw data to an existing item. +func (b *Bucket) Append(k string, data []byte) error { + return b.Write(k, 0, 0, data, Append|Raw) +} + +// Get a value straight from Memcached +func (b *Bucket) GetsMC(key string, reqDeadline time.Time) (*gomemcached.MCResponse, error) { + var err error + var response *gomemcached.MCResponse + + if key == "" { + return nil, nil + } + + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("GetsMC", key, t, err) }(time.Now()) + } + + err = b.Do2(key, func(mc *memcached.Client, vb uint16) error { + var err1 error + + mc.SetDeadline(getDeadline(reqDeadline, DefaultTimeout)) + response, err1 = mc.Get(vb, key) + mc.SetDeadline(noDeadline) + if err1 != nil { + return err1 + } + return nil + }, false) + return response, err +} + +// Get a value through the subdoc API +func (b *Bucket) GetsSubDoc(key string, reqDeadline time.Time, subPaths []string) (*gomemcached.MCResponse, error) { + var err error + var response *gomemcached.MCResponse + + if key == "" { + return nil, nil + } + + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("GetsSubDoc", key, t, err) }(time.Now()) + } + + err = b.Do2(key, func(mc *memcached.Client, vb uint16) error { + var err1 error + + mc.SetDeadline(getDeadline(reqDeadline, DefaultTimeout)) + response, err1 = mc.GetSubdoc(vb, key, subPaths) + mc.SetDeadline(noDeadline) + if err1 != nil { + return err1 + } + return nil + }, false) + return response, err +} + +// GetsRaw gets a raw value from this bucket including its CAS +// counter and flags. +func (b *Bucket) GetsRaw(k string) (data []byte, flags int, + cas uint64, err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("GetsRaw", k, t, err) }(time.Now()) + } + + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err := mc.Get(vb, k) + if err != nil { + return err + } + cas = res.Cas + if len(res.Extras) >= 4 { + flags = int(binary.BigEndian.Uint32(res.Extras)) + } + data = res.Body + return nil + }) + return +} + +// Gets gets a value from this bucket, including its CAS counter. The +// value is expected to be a JSON stream and will be deserialized into +// rv. +func (b *Bucket) Gets(k string, rv interface{}, caso *uint64) error { + data, _, cas, err := b.GetsRaw(k) + if err != nil { + return err + } + if caso != nil { + *caso = cas + } + return json.Unmarshal(data, rv) +} + +// Get a value from this bucket. +// The value is expected to be a JSON stream and will be deserialized +// into rv. +func (b *Bucket) Get(k string, rv interface{}) error { + return b.Gets(k, rv, nil) +} + +// GetRaw gets a raw value from this bucket. No marshaling is performed. +func (b *Bucket) GetRaw(k string) ([]byte, error) { + d, _, _, err := b.GetsRaw(k) + return d, err +} + +// GetAndTouchRaw gets a raw value from this bucket including its CAS +// counter and flags, and updates the expiry on the doc. +func (b *Bucket) GetAndTouchRaw(k string, exp int) (data []byte, + cas uint64, err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("GetsRaw", k, t, err) }(time.Now()) + } + + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err := mc.GetAndTouch(vb, k, exp) + if err != nil { + return err + } + cas = res.Cas + data = res.Body + return nil + }) + return data, cas, err +} + +// GetMeta returns the meta values for a key +func (b *Bucket) GetMeta(k string, flags *int, expiry *int, cas *uint64, seqNo *uint64) (err error) { + + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("GetsMeta", k, t, err) }(time.Now()) + } + + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err := mc.GetMeta(vb, k) + if err != nil { + return err + } + + *cas = res.Cas + if len(res.Extras) >= 8 { + *flags = int(binary.BigEndian.Uint32(res.Extras[4:])) + } + + if len(res.Extras) >= 12 { + *expiry = int(binary.BigEndian.Uint32(res.Extras[8:])) + } + + if len(res.Extras) >= 20 { + *seqNo = uint64(binary.BigEndian.Uint64(res.Extras[12:])) + } + + return nil + }) + + return err +} + +// Delete a key from this bucket. +func (b *Bucket) Delete(k string) error { + return b.Write(k, 0, 0, nil, Raw) +} + +// Incr increments the value at a given key by amt and defaults to def if no value present. +func (b *Bucket) Incr(k string, amt, def uint64, exp int) (val uint64, err error) { + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("Incr", k, t, err) }(time.Now()) + } + + var rv uint64 + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err := mc.Incr(vb, k, amt, def, exp) + if err != nil { + return err + } + rv = res + return nil + }) + return rv, err +} + +// Decr decrements the value at a given key by amt and defaults to def if no value present +func (b *Bucket) Decr(k string, amt, def uint64, exp int) (val uint64, err error) { + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("Decr", k, t, err) }(time.Now()) + } + + var rv uint64 + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + res, err := mc.Decr(vb, k, amt, def, exp) + if err != nil { + return err + } + rv = res + return nil + }) + return rv, err +} + +// Wrapper around memcached.CASNext() +func (b *Bucket) casNext(k string, exp int, state *memcached.CASState) bool { + if ClientOpCallback != nil { + defer func(t time.Time) { + ClientOpCallback("casNext", k, t, state.Err) + }(time.Now()) + } + + keepGoing := false + state.Err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + keepGoing = mc.CASNext(vb, k, exp, state) + return state.Err + }) + return keepGoing && state.Err == nil +} + +// An UpdateFunc is a callback function to update a document +type UpdateFunc func(current []byte) (updated []byte, err error) + +// Return this as the error from an UpdateFunc to cancel the Update +// operation. +const UpdateCancel = memcached.CASQuit + +// Update performs a Safe update of a document, avoiding conflicts by +// using CAS. +// +// The callback function will be invoked with the current raw document +// contents (or nil if the document doesn't exist); it should return +// the updated raw contents (or nil to delete.) If it decides not to +// change anything it can return UpdateCancel as the error. +// +// If another writer modifies the document between the get and the +// set, the callback will be invoked again with the newer value. +func (b *Bucket) Update(k string, exp int, callback UpdateFunc) error { + _, err := b.update(k, exp, callback) + return err +} + +// internal version of Update that returns a CAS value +func (b *Bucket) update(k string, exp int, callback UpdateFunc) (newCas uint64, err error) { + var state memcached.CASState + for b.casNext(k, exp, &state) { + var err error + if state.Value, err = callback(state.Value); err != nil { + return 0, err + } + } + return state.Cas, state.Err +} + +// A WriteUpdateFunc is a callback function to update a document +type WriteUpdateFunc func(current []byte) (updated []byte, opt WriteOptions, err error) + +// WriteUpdate performs a Safe update of a document, avoiding +// conflicts by using CAS. WriteUpdate is like Update, except that +// the callback can return a set of WriteOptions, of which Persist and +// Indexable are recognized: these cause the call to wait until the +// document update has been persisted to disk and/or become available +// to index. +func (b *Bucket) WriteUpdate(k string, exp int, callback WriteUpdateFunc) error { + var writeOpts WriteOptions + var deletion bool + // Wrap the callback in an UpdateFunc we can pass to Update: + updateCallback := func(current []byte) (updated []byte, err error) { + update, opt, err := callback(current) + writeOpts = opt + deletion = (update == nil) + return update, err + } + cas, err := b.update(k, exp, updateCallback) + if err != nil { + return err + } + // If callback asked, wait for persistence or indexability: + if writeOpts&(Persist|Indexable) != 0 { + err = b.WaitForPersistence(k, cas, deletion) + } + return err +} + +// Observe observes the current state of a document. +func (b *Bucket) Observe(k string) (result memcached.ObserveResult, err error) { + if ClientOpCallback != nil { + defer func(t time.Time) { ClientOpCallback("Observe", k, t, err) }(time.Now()) + } + + err = b.Do(k, func(mc *memcached.Client, vb uint16) error { + result, err = mc.Observe(vb, k) + return err + }) + return +} + +// Returned from WaitForPersistence (or Write, if the Persistent or Indexable flag is used) +// if the value has been overwritten by another before being persisted. +var ErrOverwritten = errors.New("overwritten") + +// Returned from WaitForPersistence (or Write, if the Persistent or Indexable flag is used) +// if the value hasn't been persisted by the timeout interval +var ErrTimeout = errors.New("timeout") + +// WaitForPersistence waits for an item to be considered durable. +// +// Besides transport errors, ErrOverwritten may be returned if the +// item is overwritten before it reaches durability. ErrTimeout may +// occur if the item isn't found durable in a reasonable amount of +// time. +func (b *Bucket) WaitForPersistence(k string, cas uint64, deletion bool) error { + timeout := 10 * time.Second + sleepDelay := 5 * time.Millisecond + start := time.Now() + for { + time.Sleep(sleepDelay) + sleepDelay += sleepDelay / 2 // multiply delay by 1.5 every time + + result, err := b.Observe(k) + if err != nil { + return err + } + if persisted, overwritten := result.CheckPersistence(cas, deletion); overwritten { + return ErrOverwritten + } else if persisted { + return nil + } + + if result.PersistenceTime > 0 { + timeout = 2 * result.PersistenceTime + } + if time.Since(start) >= timeout-sleepDelay { + return ErrTimeout + } + } +} + +var _STRING_MCRESPONSE_POOL = gomemcached.NewStringMCResponsePool(16) + +type stringPool struct { + pool *sync.Pool + size int +} + +func newStringPool(size int) *stringPool { + rv := &stringPool{ + pool: &sync.Pool{ + New: func() interface{} { + return make([]string, 0, size) + }, + }, + size: size, + } + + return rv +} + +func (this *stringPool) Get() []string { + return this.pool.Get().([]string) +} + +func (this *stringPool) Put(s []string) { + if s == nil || cap(s) < this.size || cap(s) > 2*this.size { + return + } + + this.pool.Put(s[0:0]) +} + +var _STRING_POOL = newStringPool(16) + +type vbStringPool struct { + pool *sync.Pool + strPool *stringPool +} + +func newVBStringPool(size int, sp *stringPool) *vbStringPool { + rv := &vbStringPool{ + pool: &sync.Pool{ + New: func() interface{} { + return make(map[uint16][]string, size) + }, + }, + strPool: sp, + } + + return rv +} + +func (this *vbStringPool) Get() map[uint16][]string { + return this.pool.Get().(map[uint16][]string) +} + +func (this *vbStringPool) Put(s map[uint16][]string) { + if s == nil { + return + } + + for k, v := range s { + delete(s, k) + this.strPool.Put(v) + } + + this.pool.Put(s) +} + +var _VB_STRING_POOL = newVBStringPool(16, _STRING_POOL) diff --git a/vendor/github.com/couchbaselabs/go-couchbase/conn_pool.go b/vendor/github.com/couchbaselabs/go-couchbase/conn_pool.go new file mode 100644 index 0000000000..babd3adb6a --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/conn_pool.go @@ -0,0 +1,387 @@ +package couchbase + +import ( + "errors" + "sync/atomic" + "time" + + "github.com/couchbase/gomemcached" + "github.com/couchbase/gomemcached/client" + "github.com/couchbase/goutils/logging" +) + +// GenericMcdAuthHandler is a kind of AuthHandler that performs +// special auth exchange (like non-standard auth, possibly followed by +// select-bucket). +type GenericMcdAuthHandler interface { + AuthHandler + AuthenticateMemcachedConn(host string, conn *memcached.Client) error +} + +// Error raised when a connection can't be retrieved from a pool. +var TimeoutError = errors.New("timeout waiting to build connection") +var errClosedPool = errors.New("the connection pool is closed") +var errNoPool = errors.New("no connection pool") + +// Default timeout for retrieving a connection from the pool. +var ConnPoolTimeout = time.Hour * 24 * 30 + +// overflow connection closer cycle time +var ConnCloserInterval = time.Second * 30 + +// ConnPoolAvailWaitTime is the amount of time to wait for an existing +// connection from the pool before considering the creation of a new +// one. +var ConnPoolAvailWaitTime = time.Millisecond + +type connectionPool struct { + host string + mkConn func(host string, ah AuthHandler) (*memcached.Client, error) + auth AuthHandler + connections chan *memcached.Client + createsem chan bool + bailOut chan bool + poolSize int + connCount uint64 + inUse bool +} + +func newConnectionPool(host string, ah AuthHandler, closer bool, poolSize, poolOverflow int) *connectionPool { + connSize := poolSize + if closer { + connSize += poolOverflow + } + rv := &connectionPool{ + host: host, + connections: make(chan *memcached.Client, connSize), + createsem: make(chan bool, poolSize+poolOverflow), + mkConn: defaultMkConn, + auth: ah, + poolSize: poolSize, + } + if closer { + rv.bailOut = make(chan bool, 1) + go rv.connCloser() + } + return rv +} + +// ConnPoolTimeout is notified whenever connections are acquired from a pool. +var ConnPoolCallback func(host string, source string, start time.Time, err error) + +func defaultMkConn(host string, ah AuthHandler) (*memcached.Client, error) { + var features memcached.Features + + conn, err := memcached.Connect("tcp", host) + if err != nil { + return nil, err + } + + if TCPKeepalive == true { + conn.SetKeepAliveOptions(time.Duration(TCPKeepaliveInterval) * time.Second) + } + + if EnableMutationToken == true { + features = append(features, memcached.FeatureMutationToken) + } + if EnableDataType == true { + features = append(features, memcached.FeatureDataType) + } + + if EnableXattr == true { + features = append(features, memcached.FeatureXattr) + } + + if len(features) > 0 { + if DefaultTimeout > 0 { + conn.SetDeadline(getDeadline(noDeadline, DefaultTimeout)) + } + + res, err := conn.EnableFeatures(features) + + if DefaultTimeout > 0 { + conn.SetDeadline(noDeadline) + } + + if err != nil && isTimeoutError(err) { + conn.Close() + return nil, err + } + + if err != nil || res.Status != gomemcached.SUCCESS { + logging.Warnf("Unable to enable features %v", err) + } + } + + if gah, ok := ah.(GenericMcdAuthHandler); ok { + err = gah.AuthenticateMemcachedConn(host, conn) + if err != nil { + conn.Close() + return nil, err + } + return conn, nil + } + name, pass, bucket := ah.GetCredentials() + if name != "default" { + _, err = conn.Auth(name, pass) + if err != nil { + conn.Close() + return nil, err + } + // Select bucket (Required for cb_auth creds) + // Required when doing auth with _admin credentials + if bucket != "" && bucket != name { + _, err = conn.SelectBucket(bucket) + if err != nil { + conn.Close() + return nil, err + } + } + } + return conn, nil +} + +func (cp *connectionPool) Close() (err error) { + defer func() { + if recover() != nil { + err = errors.New("connectionPool.Close error") + } + }() + if cp.bailOut != nil { + + // defensively, we won't wait if the channel is full + select { + case cp.bailOut <- false: + default: + } + } + close(cp.connections) + for c := range cp.connections { + c.Close() + } + return +} + +func (cp *connectionPool) Node() string { + return cp.host +} + +func (cp *connectionPool) GetWithTimeout(d time.Duration) (rv *memcached.Client, err error) { + if cp == nil { + return nil, errNoPool + } + + path := "" + + if ConnPoolCallback != nil { + defer func(path *string, start time.Time) { + ConnPoolCallback(cp.host, *path, start, err) + }(&path, time.Now()) + } + + path = "short-circuit" + + // short-circuit available connetions. + select { + case rv, isopen := <-cp.connections: + if !isopen { + return nil, errClosedPool + } + atomic.AddUint64(&cp.connCount, 1) + return rv, nil + default: + } + + t := time.NewTimer(ConnPoolAvailWaitTime) + defer t.Stop() + + // Try to grab an available connection within 1ms + select { + case rv, isopen := <-cp.connections: + path = "avail1" + if !isopen { + return nil, errClosedPool + } + atomic.AddUint64(&cp.connCount, 1) + return rv, nil + case <-t.C: + // No connection came around in time, let's see + // whether we can get one or build a new one first. + t.Reset(d) // Reuse the timer for the full timeout. + select { + case rv, isopen := <-cp.connections: + path = "avail2" + if !isopen { + return nil, errClosedPool + } + atomic.AddUint64(&cp.connCount, 1) + return rv, nil + case cp.createsem <- true: + path = "create" + // Build a connection if we can't get a real one. + // This can potentially be an overflow connection, or + // a pooled connection. + rv, err := cp.mkConn(cp.host, cp.auth) + if err != nil { + // On error, release our create hold + <-cp.createsem + } else { + atomic.AddUint64(&cp.connCount, 1) + } + return rv, err + case <-t.C: + return nil, ErrTimeout + } + } +} + +func (cp *connectionPool) Get() (*memcached.Client, error) { + return cp.GetWithTimeout(ConnPoolTimeout) +} + +func (cp *connectionPool) Return(c *memcached.Client) { + if c == nil { + return + } + + if cp == nil { + c.Close() + } + + if c.IsHealthy() { + defer func() { + if recover() != nil { + // This happens when the pool has already been + // closed and we're trying to return a + // connection to it anyway. Just close the + // connection. + c.Close() + } + }() + + select { + case cp.connections <- c: + default: + <-cp.createsem + c.Close() + } + } else { + <-cp.createsem + c.Close() + } +} + +// give the ability to discard a connection from a pool +// useful for ditching connections to the wrong node after a rebalance +func (cp *connectionPool) Discard(c *memcached.Client) { + <-cp.createsem + c.Close() +} + +// asynchronous connection closer +func (cp *connectionPool) connCloser() { + var connCount uint64 + + t := time.NewTimer(ConnCloserInterval) + defer t.Stop() + + for { + connCount = cp.connCount + + // we don't exist anymore! bail out! + select { + case <-cp.bailOut: + return + case <-t.C: + } + t.Reset(ConnCloserInterval) + + // no overflow connections open or sustained requests for connections + // nothing to do until the next cycle + if len(cp.connections) <= cp.poolSize || + ConnCloserInterval/ConnPoolAvailWaitTime < time.Duration(cp.connCount-connCount) { + continue + } + + // close overflow connections now that they are not needed + for c := range cp.connections { + select { + case <-cp.bailOut: + return + default: + } + + // bail out if close did not work out + if !cp.connCleanup(c) { + return + } + if len(cp.connections) <= cp.poolSize { + break + } + } + } +} + +// close connection with recovery on error +func (cp *connectionPool) connCleanup(c *memcached.Client) (rv bool) { + + // just in case we are closing a connection after + // bailOut has been sent but we haven't yet read it + defer func() { + if recover() != nil { + rv = false + } + }() + rv = true + + c.Close() + <-cp.createsem + return +} + +func (cp *connectionPool) StartTapFeed(args *memcached.TapArguments) (*memcached.TapFeed, error) { + if cp == nil { + return nil, errNoPool + } + mc, err := cp.Get() + if err != nil { + return nil, err + } + + // A connection can't be used after TAP; Dont' count it against the + // connection pool capacity + <-cp.createsem + + return mc.StartTapFeed(*args) +} + +const DEFAULT_WINDOW_SIZE = 20 * 1024 * 1024 // 20 Mb + +func (cp *connectionPool) StartUprFeed(name string, sequence uint32, dcp_buffer_size uint32, data_chan_size int) (*memcached.UprFeed, error) { + if cp == nil { + return nil, errNoPool + } + mc, err := cp.Get() + if err != nil { + return nil, err + } + + // A connection can't be used after it has been allocated to UPR; + // Dont' count it against the connection pool capacity + <-cp.createsem + + uf, err := mc.NewUprFeed() + if err != nil { + return nil, err + } + + if err := uf.UprOpen(name, sequence, dcp_buffer_size); err != nil { + return nil, err + } + + if err := uf.StartFeedWithConfig(data_chan_size); err != nil { + return nil, err + } + + return uf, nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/ddocs.go b/vendor/github.com/couchbaselabs/go-couchbase/ddocs.go new file mode 100644 index 0000000000..f9cc343aa8 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/ddocs.go @@ -0,0 +1,288 @@ +package couchbase + +import ( + "bytes" + "encoding/json" + "fmt" + "github.com/couchbase/goutils/logging" + "io/ioutil" + "net/http" +) + +// ViewDefinition represents a single view within a design document. +type ViewDefinition struct { + Map string `json:"map"` + Reduce string `json:"reduce,omitempty"` +} + +// DDoc is the document body of a design document specifying a view. +type DDoc struct { + Language string `json:"language,omitempty"` + Views map[string]ViewDefinition `json:"views"` +} + +// DDocsResult represents the result from listing the design +// documents. +type DDocsResult struct { + Rows []struct { + DDoc struct { + Meta map[string]interface{} + JSON DDoc + } `json:"doc"` + } `json:"rows"` +} + +// GetDDocs lists all design documents +func (b *Bucket) GetDDocs() (DDocsResult, error) { + var ddocsResult DDocsResult + b.RLock() + pool := b.pool + uri := b.DDocs.URI + b.RUnlock() + + // MB-23555 ephemeral buckets have no ddocs + if uri == "" { + return DDocsResult{}, nil + } + + err := pool.client.parseURLResponse(uri, &ddocsResult) + if err != nil { + return DDocsResult{}, err + } + return ddocsResult, nil +} + +func (b *Bucket) GetDDocWithRetry(docname string, into interface{}) error { + ddocURI := fmt.Sprintf("/%s/_design/%s", b.GetName(), docname) + err := b.parseAPIResponse(ddocURI, &into) + if err != nil { + return err + } + return nil +} + +func (b *Bucket) GetDDocsWithRetry() (DDocsResult, error) { + var ddocsResult DDocsResult + b.RLock() + uri := b.DDocs.URI + b.RUnlock() + + // MB-23555 ephemeral buckets have no ddocs + if uri == "" { + return DDocsResult{}, nil + } + + err := b.parseURLResponse(uri, &ddocsResult) + if err != nil { + return DDocsResult{}, err + } + return ddocsResult, nil +} + +func (b *Bucket) ddocURL(docname string) (string, error) { + u, err := b.randomBaseURL() + if err != nil { + return "", err + } + u.Path = fmt.Sprintf("/%s/_design/%s", b.GetName(), docname) + return u.String(), nil +} + +func (b *Bucket) ddocURLNext(nodeId int, docname string) (string, int, error) { + u, selected, err := b.randomNextURL(nodeId) + if err != nil { + return "", -1, err + } + u.Path = fmt.Sprintf("/%s/_design/%s", b.GetName(), docname) + return u.String(), selected, nil +} + +const ABS_MAX_RETRIES = 10 +const ABS_MIN_RETRIES = 3 + +func (b *Bucket) getMaxRetries() (int, error) { + + maxRetries := len(b.Nodes()) + + if maxRetries == 0 { + return 0, fmt.Errorf("No available Couch rest URLs") + } + + if maxRetries > ABS_MAX_RETRIES { + maxRetries = ABS_MAX_RETRIES + } else if maxRetries < ABS_MIN_RETRIES { + maxRetries = ABS_MIN_RETRIES + } + + return maxRetries, nil +} + +// PutDDoc installs a design document. +func (b *Bucket) PutDDoc(docname string, value interface{}) error { + + var Err error + + maxRetries, err := b.getMaxRetries() + if err != nil { + return err + } + + lastNode := START_NODE_ID + + for retryCount := 0; retryCount < maxRetries; retryCount++ { + + Err = nil + + ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname) + if err != nil { + return err + } + + lastNode = selectedNode + + logging.Infof(" Trying with selected node %d", selectedNode) + j, err := json.Marshal(value) + if err != nil { + return err + } + + req, err := http.NewRequest("PUT", ddocU, bytes.NewReader(j)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + err = maybeAddAuth(req, b.authHandler(false /* bucket not yet locked */)) + if err != nil { + return err + } + + res, err := doHTTPRequest(req) + if err != nil { + return err + } + + if res.StatusCode != 201 { + body, _ := ioutil.ReadAll(res.Body) + Err = fmt.Errorf("error installing view: %v / %s", + res.Status, body) + logging.Errorf(" Error in PutDDOC %v. Retrying...", Err) + res.Body.Close() + b.Refresh() + continue + } + + res.Body.Close() + break + } + + return Err +} + +// GetDDoc retrieves a specific a design doc. +func (b *Bucket) GetDDoc(docname string, into interface{}) error { + var Err error + var res *http.Response + + maxRetries, err := b.getMaxRetries() + if err != nil { + return err + } + + lastNode := START_NODE_ID + for retryCount := 0; retryCount < maxRetries; retryCount++ { + + Err = nil + ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname) + if err != nil { + return err + } + + lastNode = selectedNode + logging.Infof(" Trying with selected node %d", selectedNode) + + req, err := http.NewRequest("GET", ddocU, nil) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + err = maybeAddAuth(req, b.authHandler(false /* bucket not yet locked */)) + if err != nil { + return err + } + + res, err = doHTTPRequest(req) + if err != nil { + return err + } + if res.StatusCode != 200 { + body, _ := ioutil.ReadAll(res.Body) + Err = fmt.Errorf("error reading view: %v / %s", + res.Status, body) + logging.Errorf(" Error in GetDDOC %v Retrying...", Err) + b.Refresh() + res.Body.Close() + continue + } + defer res.Body.Close() + break + } + + if Err != nil { + return Err + } + + d := json.NewDecoder(res.Body) + return d.Decode(into) +} + +// DeleteDDoc removes a design document. +func (b *Bucket) DeleteDDoc(docname string) error { + + var Err error + + maxRetries, err := b.getMaxRetries() + if err != nil { + return err + } + + lastNode := START_NODE_ID + + for retryCount := 0; retryCount < maxRetries; retryCount++ { + + Err = nil + ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname) + if err != nil { + return err + } + + lastNode = selectedNode + logging.Infof(" Trying with selected node %d", selectedNode) + + req, err := http.NewRequest("DELETE", ddocU, nil) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + err = maybeAddAuth(req, b.authHandler(false /* bucket not already locked */)) + if err != nil { + return err + } + + res, err := doHTTPRequest(req) + if err != nil { + return err + } + if res.StatusCode != 200 { + body, _ := ioutil.ReadAll(res.Body) + Err = fmt.Errorf("error deleting view : %v / %s", res.Status, body) + logging.Errorf(" Error in DeleteDDOC %v. Retrying ... ", Err) + b.Refresh() + res.Body.Close() + continue + } + + res.Body.Close() + break + } + return Err +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/observe.go b/vendor/github.com/couchbaselabs/go-couchbase/observe.go new file mode 100644 index 0000000000..6e746f5a16 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/observe.go @@ -0,0 +1,300 @@ +package couchbase + +import ( + "fmt" + "github.com/couchbase/goutils/logging" + "sync" +) + +type PersistTo uint8 + +const ( + PersistNone = PersistTo(0x00) + PersistMaster = PersistTo(0x01) + PersistOne = PersistTo(0x02) + PersistTwo = PersistTo(0x03) + PersistThree = PersistTo(0x04) + PersistFour = PersistTo(0x05) +) + +type ObserveTo uint8 + +const ( + ObserveNone = ObserveTo(0x00) + ObserveReplicateOne = ObserveTo(0x01) + ObserveReplicateTwo = ObserveTo(0x02) + ObserveReplicateThree = ObserveTo(0x03) + ObserveReplicateFour = ObserveTo(0x04) +) + +type JobType uint8 + +const ( + OBSERVE = JobType(0x00) + PERSIST = JobType(0x01) +) + +type ObservePersistJob struct { + vb uint16 + vbuuid uint64 + hostname string + jobType JobType + failover uint8 + lastPersistedSeqNo uint64 + currentSeqNo uint64 + resultChan chan *ObservePersistJob + errorChan chan *OPErrResponse +} + +type OPErrResponse struct { + vb uint16 + vbuuid uint64 + err error + job *ObservePersistJob +} + +var ObservePersistPool = NewPool(1024) +var OPJobChan = make(chan *ObservePersistJob, 1024) +var OPJobDone = make(chan bool) + +var wg sync.WaitGroup + +func (b *Bucket) StartOPPollers(maxWorkers int) { + + for i := 0; i < maxWorkers; i++ { + go b.OPJobPoll() + wg.Add(1) + } + wg.Wait() +} + +func (b *Bucket) SetObserveAndPersist(nPersist PersistTo, nObserve ObserveTo) (err error) { + + numNodes := len(b.Nodes()) + if int(nPersist) > numNodes || int(nObserve) > numNodes { + return fmt.Errorf("Not enough healthy nodes in the cluster") + } + + if int(nPersist) > (b.Replicas+1) || int(nObserve) > b.Replicas { + return fmt.Errorf("Not enough replicas in the cluster") + } + + if EnableMutationToken == false { + return fmt.Errorf("Mutation Tokens not enabled ") + } + + b.ds = &DurablitySettings{Persist: PersistTo(nPersist), Observe: ObserveTo(nObserve)} + return +} + +func (b *Bucket) ObserveAndPersistPoll(vb uint16, vbuuid uint64, seqNo uint64) (err error, failover bool) { + b.RLock() + ds := b.ds + b.RUnlock() + + if ds == nil { + return + } + + nj := 0 // total number of jobs + resultChan := make(chan *ObservePersistJob, 10) + errChan := make(chan *OPErrResponse, 10) + + nodes := b.GetNodeList(vb) + if int(ds.Observe) > len(nodes) || int(ds.Persist) > len(nodes) { + return fmt.Errorf("Not enough healthy nodes in the cluster"), false + } + + logging.Infof("Node list %v", nodes) + + if ds.Observe >= ObserveReplicateOne { + // create a job for each host + for i := ObserveReplicateOne; i < ds.Observe+1; i++ { + opJob := ObservePersistPool.Get() + opJob.vb = vb + opJob.vbuuid = vbuuid + opJob.jobType = OBSERVE + opJob.hostname = nodes[i] + opJob.resultChan = resultChan + opJob.errorChan = errChan + + OPJobChan <- opJob + nj++ + + } + } + + if ds.Persist >= PersistMaster { + for i := PersistMaster; i < ds.Persist+1; i++ { + opJob := ObservePersistPool.Get() + opJob.vb = vb + opJob.vbuuid = vbuuid + opJob.jobType = PERSIST + opJob.hostname = nodes[i] + opJob.resultChan = resultChan + opJob.errorChan = errChan + + OPJobChan <- opJob + nj++ + + } + } + + ok := true + for ok { + select { + case res := <-resultChan: + jobDone := false + if res.failover == 0 { + // no failover + if res.jobType == PERSIST { + if res.lastPersistedSeqNo >= seqNo { + jobDone = true + } + + } else { + if res.currentSeqNo >= seqNo { + jobDone = true + } + } + + if jobDone == true { + nj-- + ObservePersistPool.Put(res) + } else { + // requeue this job + OPJobChan <- res + } + + } else { + // Not currently handling failover scenarios TODO + nj-- + ObservePersistPool.Put(res) + failover = true + } + + if nj == 0 { + // done with all the jobs + ok = false + close(resultChan) + close(errChan) + } + + case Err := <-errChan: + logging.Errorf("Error in Observe/Persist %v", Err.err) + err = fmt.Errorf("Error in Observe/Persist job %v", Err.err) + nj-- + ObservePersistPool.Put(Err.job) + if nj == 0 { + close(resultChan) + close(errChan) + ok = false + } + } + } + + return +} + +func (b *Bucket) OPJobPoll() { + + ok := true + for ok == true { + select { + case job := <-OPJobChan: + pool := b.getConnPoolByHost(job.hostname, false /* bucket not already locked */) + if pool == nil { + errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid} + errRes.err = fmt.Errorf("Pool not found for host %v", job.hostname) + errRes.job = job + job.errorChan <- errRes + continue + } + conn, err := pool.Get() + if err != nil { + errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid} + errRes.err = fmt.Errorf("Unable to get connection from pool %v", err) + errRes.job = job + job.errorChan <- errRes + continue + } + + res, err := conn.ObserveSeq(job.vb, job.vbuuid) + if err != nil { + errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid} + errRes.err = fmt.Errorf("Command failed %v", err) + errRes.job = job + job.errorChan <- errRes + continue + + } + pool.Return(conn) + job.lastPersistedSeqNo = res.LastPersistedSeqNo + job.currentSeqNo = res.CurrentSeqNo + job.failover = res.Failover + + job.resultChan <- job + case <-OPJobDone: + logging.Infof("Observe Persist Poller exitting") + ok = false + } + } + wg.Done() +} + +func (b *Bucket) GetNodeList(vb uint16) []string { + + vbm := b.VBServerMap() + if len(vbm.VBucketMap) < int(vb) { + logging.Infof("vbmap smaller than vblist") + return nil + } + + nodes := make([]string, len(vbm.VBucketMap[vb])) + for i := 0; i < len(vbm.VBucketMap[vb]); i++ { + n := vbm.VBucketMap[vb][i] + if n < 0 { + continue + } + + node := b.getMasterNode(n) + if len(node) > 1 { + nodes[i] = node + } + continue + + } + return nodes +} + +//pool of ObservePersist Jobs +type OPpool struct { + pool chan *ObservePersistJob +} + +// NewPool creates a new pool of jobs +func NewPool(max int) *OPpool { + return &OPpool{ + pool: make(chan *ObservePersistJob, max), + } +} + +// Borrow a Client from the pool. +func (p *OPpool) Get() *ObservePersistJob { + var o *ObservePersistJob + select { + case o = <-p.pool: + default: + o = &ObservePersistJob{} + } + return o +} + +// Return returns a Client to the pool. +func (p *OPpool) Put(o *ObservePersistJob) { + select { + case p.pool <- o: + default: + // let it go, let it go... + } +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/pools.go b/vendor/github.com/couchbaselabs/go-couchbase/pools.go new file mode 100644 index 0000000000..5f3ff8c495 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/pools.go @@ -0,0 +1,1282 @@ +package couchbase + +import ( + "bufio" + "bytes" + "crypto/tls" + "crypto/x509" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "math/rand" + "net/http" + "net/url" + "runtime" + "sort" + "strings" + "sync" + "unsafe" + + "github.com/couchbase/goutils/logging" + + "github.com/couchbase/gomemcached" // package name is 'gomemcached' + "github.com/couchbase/gomemcached/client" // package name is 'memcached' +) + +// HTTPClient to use for REST and view operations. +var MaxIdleConnsPerHost = 256 +var HTTPTransport = &http.Transport{MaxIdleConnsPerHost: MaxIdleConnsPerHost} +var HTTPClient = &http.Client{Transport: HTTPTransport} + +// PoolSize is the size of each connection pool (per host). +var PoolSize = 64 + +// PoolOverflow is the number of overflow connections allowed in a +// pool. +var PoolOverflow = 16 + +// AsynchronousCloser turns on asynchronous closing for overflow connections +var AsynchronousCloser = false + +// TCP KeepAlive enabled/disabled +var TCPKeepalive = false + +// Enable MutationToken +var EnableMutationToken = false + +// Enable Data Type response +var EnableDataType = false + +// Enable Xattr +var EnableXattr = false + +// TCP keepalive interval in seconds. Default 30 minutes +var TCPKeepaliveInterval = 30 * 60 + +// Used to decide whether to skip verification of certificates when +// connecting to an ssl port. +var skipVerify = true +var certFile = "" +var keyFile = "" +var rootFile = "" + +func SetSkipVerify(skip bool) { + skipVerify = skip +} + +func SetCertFile(cert string) { + certFile = cert +} + +func SetKeyFile(cert string) { + keyFile = cert +} + +func SetRootFile(cert string) { + rootFile = cert +} + +// Allow applications to speciify the Poolsize and Overflow +func SetConnectionPoolParams(size, overflow int) { + + if size > 0 { + PoolSize = size + } + + if overflow > 0 { + PoolOverflow = overflow + } +} + +// Turn off overflow connections +func DisableOverflowConnections() { + PoolOverflow = 0 +} + +// Toggle asynchronous overflow closer +func EnableAsynchronousCloser(closer bool) { + AsynchronousCloser = closer +} + +// Allow TCP keepalive parameters to be set by the application +func SetTcpKeepalive(enabled bool, interval int) { + + TCPKeepalive = enabled + + if interval > 0 { + TCPKeepaliveInterval = interval + } +} + +// AuthHandler is a callback that gets the auth username and password +// for the given bucket. +type AuthHandler interface { + GetCredentials() (string, string, string) +} + +// AuthHandler is a callback that gets the auth username and password +// for the given bucket and sasl for memcached. +type AuthWithSaslHandler interface { + AuthHandler + GetSaslCredentials() (string, string) +} + +// MultiBucketAuthHandler is kind of AuthHandler that may perform +// different auth for different buckets. +type MultiBucketAuthHandler interface { + AuthHandler + ForBucket(bucket string) AuthHandler +} + +// HTTPAuthHandler is kind of AuthHandler that performs more general +// for outgoing http requests than is possible via simple +// GetCredentials() call (i.e. digest auth or different auth per +// different destinations). +type HTTPAuthHandler interface { + AuthHandler + SetCredsForRequest(req *http.Request) error +} + +// RestPool represents a single pool returned from the pools REST API. +type RestPool struct { + Name string `json:"name"` + StreamingURI string `json:"streamingUri"` + URI string `json:"uri"` +} + +// Pools represents the collection of pools as returned from the REST API. +type Pools struct { + ComponentsVersion map[string]string `json:"componentsVersion,omitempty"` + ImplementationVersion string `json:"implementationVersion"` + IsAdmin bool `json:"isAdminCreds"` + UUID string `json:"uuid"` + Pools []RestPool `json:"pools"` +} + +// A Node is a computer in a cluster running the couchbase software. +type Node struct { + ClusterCompatibility int `json:"clusterCompatibility"` + ClusterMembership string `json:"clusterMembership"` + CouchAPIBase string `json:"couchApiBase"` + Hostname string `json:"hostname"` + InterestingStats map[string]float64 `json:"interestingStats,omitempty"` + MCDMemoryAllocated float64 `json:"mcdMemoryAllocated"` + MCDMemoryReserved float64 `json:"mcdMemoryReserved"` + MemoryFree float64 `json:"memoryFree"` + MemoryTotal float64 `json:"memoryTotal"` + OS string `json:"os"` + Ports map[string]int `json:"ports"` + Services []string `json:"services"` + Status string `json:"status"` + Uptime int `json:"uptime,string"` + Version string `json:"version"` + ThisNode bool `json:"thisNode,omitempty"` +} + +// A Pool of nodes and buckets. +type Pool struct { + BucketMap map[string]Bucket + Nodes []Node + + BucketURL map[string]string `json:"buckets"` + + client Client +} + +// VBucketServerMap is the a mapping of vbuckets to nodes. +type VBucketServerMap struct { + HashAlgorithm string `json:"hashAlgorithm"` + NumReplicas int `json:"numReplicas"` + ServerList []string `json:"serverList"` + VBucketMap [][]int `json:"vBucketMap"` +} + +type DurablitySettings struct { + Persist PersistTo + Observe ObserveTo +} + +// Bucket is the primary entry point for most data operations. +// Bucket is a locked data structure. All access to its fields should be done using read or write locking, +// as appropriate. +// +// Some access methods require locking, but rely on the caller to do so. These are appropriate +// for calls from methods that have already locked the structure. Methods like this +// take a boolean parameter "bucketLocked". +type Bucket struct { + sync.RWMutex + AuthType string `json:"authType"` + Capabilities []string `json:"bucketCapabilities"` + CapabilitiesVersion string `json:"bucketCapabilitiesVer"` + Type string `json:"bucketType"` + Name string `json:"name"` + NodeLocator string `json:"nodeLocator"` + Quota map[string]float64 `json:"quota,omitempty"` + Replicas int `json:"replicaNumber"` + Password string `json:"saslPassword"` + URI string `json:"uri"` + StreamingURI string `json:"streamingUri"` + LocalRandomKeyURI string `json:"localRandomKeyUri,omitempty"` + UUID string `json:"uuid"` + ConflictResolutionType string `json:"conflictResolutionType,omitempty"` + DDocs struct { + URI string `json:"uri"` + } `json:"ddocs,omitempty"` + BasicStats map[string]interface{} `json:"basicStats,omitempty"` + Controllers map[string]interface{} `json:"controllers,omitempty"` + + // These are used for JSON IO, but isn't used for processing + // since it needs to be swapped out safely. + VBSMJson VBucketServerMap `json:"vBucketServerMap"` + NodesJSON []Node `json:"nodes"` + + pool *Pool + connPools unsafe.Pointer // *[]*connectionPool + vBucketServerMap unsafe.Pointer // *VBucketServerMap + nodeList unsafe.Pointer // *[]Node + commonSufix string + ah AuthHandler // auth handler + ds *DurablitySettings // Durablity Settings for this bucket + Scopes Scopes +} + +// PoolServices is all the bucket-independent services in a pool +type PoolServices struct { + Rev int `json:"rev"` + NodesExt []NodeServices `json:"nodesExt"` +} + +// NodeServices is all the bucket-independent services running on +// a node (given by Hostname) +type NodeServices struct { + Services map[string]int `json:"services,omitempty"` + Hostname string `json:"hostname"` + ThisNode bool `json:"thisNode"` +} + +type BucketNotFoundError struct { + bucket string +} + +func (e *BucketNotFoundError) Error() string { + return fmt.Sprint("No bucket named " + e.bucket) +} + +type BucketAuth struct { + name string + saslPwd string + bucket string +} + +func newBucketAuth(name string, pass string, bucket string) *BucketAuth { + return &BucketAuth{name: name, saslPwd: pass, bucket: bucket} +} + +func (ba *BucketAuth) GetCredentials() (string, string, string) { + return ba.name, ba.saslPwd, ba.bucket +} + +// VBServerMap returns the current VBucketServerMap. +func (b *Bucket) VBServerMap() *VBucketServerMap { + b.RLock() + defer b.RUnlock() + ret := (*VBucketServerMap)(b.vBucketServerMap) + return ret +} + +func (b *Bucket) GetVBmap(addrs []string) (map[string][]uint16, error) { + vbmap := b.VBServerMap() + servers := vbmap.ServerList + if addrs == nil { + addrs = vbmap.ServerList + } + + m := make(map[string][]uint16) + for _, addr := range addrs { + m[addr] = make([]uint16, 0) + } + for vbno, idxs := range vbmap.VBucketMap { + if len(idxs) == 0 { + return nil, fmt.Errorf("vbmap: No KV node no for vb %d", vbno) + } else if idxs[0] < 0 || idxs[0] >= len(servers) { + return nil, fmt.Errorf("vbmap: Invalid KV node no %d for vb %d", idxs[0], vbno) + } + addr := servers[idxs[0]] + if _, ok := m[addr]; ok { + m[addr] = append(m[addr], uint16(vbno)) + } + } + return m, nil +} + +// true if node is not on the bucket VBmap +func (b *Bucket) checkVBmap(node string) bool { + vbmap := b.VBServerMap() + servers := vbmap.ServerList + + for _, idxs := range vbmap.VBucketMap { + if len(idxs) == 0 { + return true + } else if idxs[0] < 0 || idxs[0] >= len(servers) { + return true + } + if servers[idxs[0]] == node { + return false + } + } + return true +} + +func (b *Bucket) GetName() string { + b.RLock() + defer b.RUnlock() + ret := b.Name + return ret +} + +// Nodes returns teh current list of nodes servicing this bucket. +func (b *Bucket) Nodes() []Node { + b.RLock() + defer b.RUnlock() + ret := *(*[]Node)(b.nodeList) + return ret +} + +// return the list of healthy nodes +func (b *Bucket) HealthyNodes() []Node { + nodes := []Node{} + + for _, n := range b.Nodes() { + if n.Status == "healthy" && n.CouchAPIBase != "" { + nodes = append(nodes, n) + } + if n.Status != "healthy" { // log non-healthy node + logging.Infof("Non-healthy node; node details:") + logging.Infof("Hostname=%v, Status=%v, CouchAPIBase=%v, ThisNode=%v", n.Hostname, n.Status, n.CouchAPIBase, n.ThisNode) + } + } + + return nodes +} + +func (b *Bucket) getConnPools(bucketLocked bool) []*connectionPool { + if !bucketLocked { + b.RLock() + defer b.RUnlock() + } + if b.connPools != nil { + return *(*[]*connectionPool)(b.connPools) + } else { + return nil + } +} + +func (b *Bucket) replaceConnPools(with []*connectionPool) { + b.Lock() + defer b.Unlock() + + old := b.connPools + b.connPools = unsafe.Pointer(&with) + if old != nil { + for _, pool := range *(*[]*connectionPool)(old) { + if pool != nil { + pool.Close() + } + } + } + return +} + +func (b *Bucket) getConnPool(i int) *connectionPool { + + if i < 0 { + return nil + } + + p := b.getConnPools(false /* not already locked */) + if len(p) > i { + return p[i] + } + + return nil +} + +func (b *Bucket) getConnPoolByHost(host string, bucketLocked bool) *connectionPool { + pools := b.getConnPools(bucketLocked) + for _, p := range pools { + if p != nil && p.host == host { + return p + } + } + + return nil +} + +// Given a vbucket number, returns a memcached connection to it. +// The connection must be returned to its pool after use. +func (b *Bucket) getConnectionToVBucket(vb uint32) (*memcached.Client, *connectionPool, error) { + for { + vbm := b.VBServerMap() + if len(vbm.VBucketMap) < int(vb) { + return nil, nil, fmt.Errorf("go-couchbase: vbmap smaller than vbucket list: %v vs. %v", + vb, vbm.VBucketMap) + } + masterId := vbm.VBucketMap[vb][0] + if masterId < 0 { + return nil, nil, fmt.Errorf("go-couchbase: No master for vbucket %d", vb) + } + pool := b.getConnPool(masterId) + conn, err := pool.Get() + if err != errClosedPool { + return conn, pool, err + } + // If conn pool was closed, because another goroutine refreshed the vbucket map, retry... + } +} + +// To get random documents, we need to cover all the nodes, so select +// a connection at random. + +func (b *Bucket) getRandomConnection() (*memcached.Client, *connectionPool, error) { + for { + var currentPool = 0 + pools := b.getConnPools(false /* not already locked */) + if len(pools) == 0 { + return nil, nil, fmt.Errorf("No connection pool found") + } else if len(pools) > 1 { // choose a random connection + currentPool = rand.Intn(len(pools)) + } // if only one pool, currentPool defaults to 0, i.e., the only pool + + // get the pool + pool := pools[currentPool] + conn, err := pool.Get() + if err != errClosedPool { + return conn, pool, err + } + + // If conn pool was closed, because another goroutine refreshed the vbucket map, retry... + } +} + +// +// Get a random document from a bucket. Since the bucket may be distributed +// across nodes, we must first select a random connection, and then use the +// Client.GetRandomDoc() call to get a random document from that node. +// + +func (b *Bucket) GetRandomDoc() (*gomemcached.MCResponse, error) { + // get a connection from the pool + conn, pool, err := b.getRandomConnection() + + if err != nil { + return nil, err + } + + // get a randomm document from the connection + doc, err := conn.GetRandomDoc() + // need to return the connection to the pool + pool.Return(conn) + return doc, err +} + +func (b *Bucket) getMasterNode(i int) string { + p := b.getConnPools(false /* not already locked */) + if len(p) > i { + return p[i].host + } + return "" +} + +func (b *Bucket) authHandler(bucketLocked bool) (ah AuthHandler) { + if !bucketLocked { + b.RLock() + defer b.RUnlock() + } + pool := b.pool + name := b.Name + + if pool != nil { + ah = pool.client.ah + } + if mbah, ok := ah.(MultiBucketAuthHandler); ok { + return mbah.ForBucket(name) + } + if ah == nil { + ah = &basicAuth{name, ""} + } + return +} + +// NodeAddresses gets the (sorted) list of memcached node addresses +// (hostname:port). +func (b *Bucket) NodeAddresses() []string { + vsm := b.VBServerMap() + rv := make([]string, len(vsm.ServerList)) + copy(rv, vsm.ServerList) + sort.Strings(rv) + return rv +} + +// CommonAddressSuffix finds the longest common suffix of all +// host:port strings in the node list. +func (b *Bucket) CommonAddressSuffix() string { + input := []string{} + for _, n := range b.Nodes() { + input = append(input, n.Hostname) + } + return FindCommonSuffix(input) +} + +// A Client is the starting point for all services across all buckets +// in a Couchbase cluster. +type Client struct { + BaseURL *url.URL + ah AuthHandler + Info Pools +} + +func maybeAddAuth(req *http.Request, ah AuthHandler) error { + if hah, ok := ah.(HTTPAuthHandler); ok { + return hah.SetCredsForRequest(req) + } + if ah != nil { + user, pass, _ := ah.GetCredentials() + req.Header.Set("Authorization", "Basic "+ + base64.StdEncoding.EncodeToString([]byte(user+":"+pass))) + } + return nil +} + +// arbitary number, may need to be tuned #FIXME +const HTTP_MAX_RETRY = 5 + +// Someday golang network packages will implement standard +// error codes. Until then #sigh +func isHttpConnError(err error) bool { + + estr := err.Error() + return strings.Contains(estr, "broken pipe") || + strings.Contains(estr, "broken connection") || + strings.Contains(estr, "connection reset") +} + +var client *http.Client + +func ClientConfigForX509(certFile, keyFile, rootFile string) (*tls.Config, error) { + cfg := &tls.Config{} + + if certFile != "" && keyFile != "" { + tlsCert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return nil, err + } + cfg.Certificates = []tls.Certificate{tlsCert} + } else { + //error need to pass both certfile and keyfile + return nil, fmt.Errorf("N1QL: Need to pass both certfile and keyfile") + } + + var caCert []byte + var err1 error + + caCertPool := x509.NewCertPool() + if rootFile != "" { + // Read that value in + caCert, err1 = ioutil.ReadFile(rootFile) + if err1 != nil { + return nil, fmt.Errorf(" Error in reading cacert file, err: %v", err1) + } + caCertPool.AppendCertsFromPEM(caCert) + } + + cfg.RootCAs = caCertPool + return cfg, nil +} + +func doHTTPRequest(req *http.Request) (*http.Response, error) { + + var err error + var res *http.Response + + tr := &http.Transport{} + + // we need a client that ignores certificate errors, since we self-sign + // our certs + if client == nil && req.URL.Scheme == "https" { + if skipVerify { + tr = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + } else { + // Handle cases with cert + + cfg, err := ClientConfigForX509(certFile, keyFile, rootFile) + if err != nil { + return nil, err + } + + tr = &http.Transport{ + TLSClientConfig: cfg, + } + } + + client = &http.Client{Transport: tr} + + } else if client == nil { + client = HTTPClient + } + + for i := 0; i < HTTP_MAX_RETRY; i++ { + res, err = client.Do(req) + if err != nil && isHttpConnError(err) { + continue + } + break + } + + if err != nil { + return nil, err + } + + return res, err +} + +func doPutAPI(baseURL *url.URL, path string, params map[string]interface{}, authHandler AuthHandler, out interface{}) error { + return doOutputAPI("PUT", baseURL, path, params, authHandler, out) +} + +func doPostAPI(baseURL *url.URL, path string, params map[string]interface{}, authHandler AuthHandler, out interface{}) error { + return doOutputAPI("POST", baseURL, path, params, authHandler, out) +} + +func doOutputAPI( + httpVerb string, + baseURL *url.URL, + path string, + params map[string]interface{}, + authHandler AuthHandler, + out interface{}) error { + + var requestUrl string + + if q := strings.Index(path, "?"); q > 0 { + requestUrl = baseURL.Scheme + "://" + baseURL.Host + path[:q] + "?" + path[q+1:] + } else { + requestUrl = baseURL.Scheme + "://" + baseURL.Host + path + } + + postData := url.Values{} + for k, v := range params { + postData.Set(k, fmt.Sprintf("%v", v)) + } + + req, err := http.NewRequest(httpVerb, requestUrl, bytes.NewBufferString(postData.Encode())) + if err != nil { + return err + } + + req.Header.Add("Content-Type", "application/x-www-form-urlencoded") + + err = maybeAddAuth(req, authHandler) + if err != nil { + return err + } + + res, err := doHTTPRequest(req) + if err != nil { + return err + } + + defer res.Body.Close() + if res.StatusCode != 200 { + bod, _ := ioutil.ReadAll(io.LimitReader(res.Body, 512)) + return fmt.Errorf("HTTP error %v getting %q: %s", + res.Status, requestUrl, bod) + } + + d := json.NewDecoder(res.Body) + if err = d.Decode(&out); err != nil { + return err + } + return nil +} + +func queryRestAPI( + baseURL *url.URL, + path string, + authHandler AuthHandler, + out interface{}) error { + + var requestUrl string + + if q := strings.Index(path, "?"); q > 0 { + requestUrl = baseURL.Scheme + "://" + baseURL.Host + path[:q] + "?" + path[q+1:] + } else { + requestUrl = baseURL.Scheme + "://" + baseURL.Host + path + } + + req, err := http.NewRequest("GET", requestUrl, nil) + if err != nil { + return err + } + + err = maybeAddAuth(req, authHandler) + if err != nil { + return err + } + + res, err := doHTTPRequest(req) + if err != nil { + return err + } + + defer res.Body.Close() + if res.StatusCode != 200 { + bod, _ := ioutil.ReadAll(io.LimitReader(res.Body, 512)) + return fmt.Errorf("HTTP error %v getting %q: %s", + res.Status, requestUrl, bod) + } + + d := json.NewDecoder(res.Body) + if err = d.Decode(&out); err != nil { + return err + } + return nil +} + +func (c *Client) ProcessStream(path string, callb func(interface{}) error, data interface{}) error { + return c.processStream(c.BaseURL, path, c.ah, callb, data) +} + +// Based on code in http://src.couchbase.org/source/xref/trunk/goproj/src/github.com/couchbase/indexing/secondary/dcp/pools.go#309 +func (c *Client) processStream(baseURL *url.URL, path string, authHandler AuthHandler, callb func(interface{}) error, data interface{}) error { + var requestUrl string + + if q := strings.Index(path, "?"); q > 0 { + requestUrl = baseURL.Scheme + "://" + baseURL.Host + path[:q] + "?" + path[q+1:] + } else { + requestUrl = baseURL.Scheme + "://" + baseURL.Host + path + } + + req, err := http.NewRequest("GET", requestUrl, nil) + if err != nil { + return err + } + + err = maybeAddAuth(req, authHandler) + if err != nil { + return err + } + + res, err := doHTTPRequest(req) + if err != nil { + return err + } + + defer res.Body.Close() + if res.StatusCode != 200 { + bod, _ := ioutil.ReadAll(io.LimitReader(res.Body, 512)) + return fmt.Errorf("HTTP error %v getting %q: %s", + res.Status, requestUrl, bod) + } + + reader := bufio.NewReader(res.Body) + for { + bs, err := reader.ReadBytes('\n') + if err != nil { + return err + } + if len(bs) == 1 && bs[0] == '\n' { + continue + } + + err = json.Unmarshal(bs, data) + if err != nil { + return err + } + err = callb(data) + if err != nil { + return err + } + } + return nil + +} + +func (c *Client) parseURLResponse(path string, out interface{}) error { + return queryRestAPI(c.BaseURL, path, c.ah, out) +} + +func (c *Client) parsePostURLResponse(path string, params map[string]interface{}, out interface{}) error { + return doPostAPI(c.BaseURL, path, params, c.ah, out) +} + +func (c *Client) parsePutURLResponse(path string, params map[string]interface{}, out interface{}) error { + return doPutAPI(c.BaseURL, path, params, c.ah, out) +} + +func (b *Bucket) parseURLResponse(path string, out interface{}) error { + nodes := b.Nodes() + if len(nodes) == 0 { + return errors.New("no couch rest URLs") + } + + // Pick a random node to start querying. + startNode := rand.Intn(len(nodes)) + maxRetries := len(nodes) + for i := 0; i < maxRetries; i++ { + node := nodes[(startNode+i)%len(nodes)] // Wrap around the nodes list. + // Skip non-healthy nodes. + if node.Status != "healthy" || node.CouchAPIBase == "" { + continue + } + url := &url.URL{ + Host: node.Hostname, + Scheme: "http", + } + + // Lock here to avoid having pool closed under us. + b.RLock() + err := queryRestAPI(url, path, b.pool.client.ah, out) + b.RUnlock() + if err == nil { + return err + } + } + return errors.New("All nodes failed to respond or no healthy nodes for bucket found") +} + +func (b *Bucket) parseAPIResponse(path string, out interface{}) error { + nodes := b.Nodes() + if len(nodes) == 0 { + return errors.New("no couch rest URLs") + } + + var err error + var u *url.URL + + // Pick a random node to start querying. + startNode := rand.Intn(len(nodes)) + maxRetries := len(nodes) + for i := 0; i < maxRetries; i++ { + node := nodes[(startNode+i)%len(nodes)] // Wrap around the nodes list. + // Skip non-healthy nodes. + if node.Status != "healthy" || node.CouchAPIBase == "" { + continue + } + + u, err = ParseURL(node.CouchAPIBase) + // Lock here so pool does not get closed under us. + b.RLock() + if err != nil { + b.RUnlock() + return fmt.Errorf("config error: Bucket %q node #%d CouchAPIBase=%q: %v", + b.Name, i, node.CouchAPIBase, err) + } else if b.pool != nil { + u.User = b.pool.client.BaseURL.User + } + u.Path = path + + // generate the path so that the strings are properly escaped + // MB-13770 + requestPath := strings.Split(u.String(), u.Host)[1] + + err = queryRestAPI(u, requestPath, b.pool.client.ah, out) + b.RUnlock() + if err == nil { + return err + } + } + + var errStr string + if err != nil { + errStr = "Error " + err.Error() + } + + return errors.New("All nodes failed to respond or returned error or no healthy nodes for bucket found." + errStr) +} + +type basicAuth struct { + u, p string +} + +func (b basicAuth) GetCredentials() (string, string, string) { + return b.u, b.p, b.u +} + +func basicAuthFromURL(us string) (ah AuthHandler) { + u, err := ParseURL(us) + if err != nil { + return + } + if user := u.User; user != nil { + pw, _ := user.Password() + ah = basicAuth{user.Username(), pw} + } + return +} + +// ConnectWithAuth connects to a couchbase cluster with the given +// authentication handler. +func ConnectWithAuth(baseU string, ah AuthHandler) (c Client, err error) { + c.BaseURL, err = ParseURL(baseU) + if err != nil { + return + } + c.ah = ah + + return c, c.parseURLResponse("/pools", &c.Info) +} + +// ConnectWithAuthCreds connects to a couchbase cluster with the give +// authorization creds returned by cb_auth +func ConnectWithAuthCreds(baseU, username, password string) (c Client, err error) { + c.BaseURL, err = ParseURL(baseU) + if err != nil { + return + } + + c.ah = newBucketAuth(username, password, "") + return c, c.parseURLResponse("/pools", &c.Info) + +} + +// Connect to a couchbase cluster. An authentication handler will be +// created from the userinfo in the URL if provided. +func Connect(baseU string) (Client, error) { + return ConnectWithAuth(baseU, basicAuthFromURL(baseU)) +} + +type BucketInfo struct { + Name string // name of bucket + Password string // SASL password of bucket +} + +//Get SASL buckets +func GetBucketList(baseU string) (bInfo []BucketInfo, err error) { + + c := &Client{} + c.BaseURL, err = ParseURL(baseU) + if err != nil { + return + } + c.ah = basicAuthFromURL(baseU) + + var buckets []Bucket + err = c.parseURLResponse("/pools/default/buckets", &buckets) + if err != nil { + return + } + bInfo = make([]BucketInfo, 0) + for _, bucket := range buckets { + bucketInfo := BucketInfo{Name: bucket.Name, Password: bucket.Password} + bInfo = append(bInfo, bucketInfo) + } + return bInfo, err +} + +//Set viewUpdateDaemonOptions +func SetViewUpdateParams(baseU string, params map[string]interface{}) (viewOpts map[string]interface{}, err error) { + + c := &Client{} + c.BaseURL, err = ParseURL(baseU) + if err != nil { + return + } + c.ah = basicAuthFromURL(baseU) + + if len(params) < 1 { + return nil, fmt.Errorf("No params to set") + } + + err = c.parsePostURLResponse("/settings/viewUpdateDaemon", params, &viewOpts) + if err != nil { + return + } + return viewOpts, err +} + +// This API lets the caller know, if the list of nodes a bucket is +// connected to has gone through an edit (a rebalance operation) +// since the last update to the bucket, in which case a Refresh is +// advised. +func (b *Bucket) NodeListChanged() bool { + b.RLock() + pool := b.pool + uri := b.URI + b.RUnlock() + + tmpb := &Bucket{} + err := pool.client.parseURLResponse(uri, tmpb) + if err != nil { + return true + } + + bNodes := *(*[]Node)(b.nodeList) + if len(bNodes) != len(tmpb.NodesJSON) { + return true + } + + bucketHostnames := map[string]bool{} + for _, node := range bNodes { + bucketHostnames[node.Hostname] = true + } + + for _, node := range tmpb.NodesJSON { + if _, found := bucketHostnames[node.Hostname]; !found { + return true + } + } + + return false +} + +// Sample data for scopes and collections as returned from the +// /pooles/default/$BUCKET_NAME/collections API. +// {"myScope2":{"myCollectionC":{}},"myScope1":{"myCollectionB":{},"myCollectionA":{}},"_default":{"_default":{}}} + +// A Scopes holds the set of scopes in a bucket. +// The map key is the name of the scope. +type Scopes map[string]Collections + +// A Collections holds the set of collections in a scope. +// The map key is the name of the collection. +type Collections map[string]Collection + +// A Collection holds the information for a collection. +// It is currently returned empty. +type Collection struct{} + +func getScopesAndCollections(pool *Pool, bucketName string) (Scopes, error) { + scopes := make(Scopes) + // This URL is a bit of a hack. The "default" is the name of the pool, and should + // be a parameter. But the name does not appear to be available anywhere, + // and in any case we never use a pool other than "default". + err := pool.client.parseURLResponse(fmt.Sprintf("/pools/default/buckets/%s/collections", bucketName), &scopes) + if err != nil { + return nil, err + } + return scopes, nil +} + +func (b *Bucket) Refresh() error { + b.RLock() + pool := b.pool + uri := b.URI + name := b.Name + b.RUnlock() + + tmpb := &Bucket{} + err := pool.client.parseURLResponse(uri, tmpb) + if err != nil { + return err + } + + scopes, err := getScopesAndCollections(pool, name) + if err != nil { + return err + } + + pools := b.getConnPools(false /* bucket not already locked */) + + // We need this lock to ensure that bucket refreshes happening because + // of NMVb errors received during bulkGet do not end up over-writing + // pool.inUse. + b.Lock() + + for _, pool := range pools { + if pool != nil { + pool.inUse = false + } + } + + newcps := make([]*connectionPool, len(tmpb.VBSMJson.ServerList)) + for i := range newcps { + + pool := b.getConnPoolByHost(tmpb.VBSMJson.ServerList[i], true /* bucket already locked */) + if pool != nil && pool.inUse == false { + // if the hostname and index is unchanged then reuse this pool + newcps[i] = pool + pool.inUse = true + continue + } + + if b.ah != nil { + newcps[i] = newConnectionPool( + tmpb.VBSMJson.ServerList[i], + b.ah, AsynchronousCloser, PoolSize, PoolOverflow) + + } else { + newcps[i] = newConnectionPool( + tmpb.VBSMJson.ServerList[i], + b.authHandler(true /* bucket already locked */), + AsynchronousCloser, PoolSize, PoolOverflow) + } + } + b.replaceConnPools2(newcps, true /* bucket already locked */) + tmpb.ah = b.ah + b.vBucketServerMap = unsafe.Pointer(&tmpb.VBSMJson) + b.nodeList = unsafe.Pointer(&tmpb.NodesJSON) + b.Scopes = scopes + + b.Unlock() + return nil +} + +func (p *Pool) refresh() (err error) { + p.BucketMap = make(map[string]Bucket) + + buckets := []Bucket{} + err = p.client.parseURLResponse(p.BucketURL["uri"], &buckets) + if err != nil { + return err + } + for _, b := range buckets { + b.pool = p + b.nodeList = unsafe.Pointer(&b.NodesJSON) + b.replaceConnPools(make([]*connectionPool, len(b.VBSMJson.ServerList))) + + p.BucketMap[b.Name] = b + } + return nil +} + +// GetPool gets a pool from within the couchbase cluster (usually +// "default"). +func (c *Client) GetPool(name string) (p Pool, err error) { + var poolURI string + for _, p := range c.Info.Pools { + if p.Name == name { + poolURI = p.URI + } + } + if poolURI == "" { + return p, errors.New("No pool named " + name) + } + + err = c.parseURLResponse(poolURI, &p) + + p.client = *c + + err = p.refresh() + return +} + +// GetPoolServices returns all the bucket-independent services in a pool. +// (See "Exposing services outside of bucket context" in http://goo.gl/uuXRkV) +func (c *Client) GetPoolServices(name string) (ps PoolServices, err error) { + var poolName string + for _, p := range c.Info.Pools { + if p.Name == name { + poolName = p.Name + } + } + if poolName == "" { + return ps, errors.New("No pool named " + name) + } + + poolURI := "/pools/" + poolName + "/nodeServices" + err = c.parseURLResponse(poolURI, &ps) + + return +} + +// Close marks this bucket as no longer needed, closing connections it +// may have open. +func (b *Bucket) Close() { + b.Lock() + defer b.Unlock() + if b.connPools != nil { + for _, c := range b.getConnPools(true /* already locked */) { + if c != nil { + c.Close() + } + } + b.connPools = nil + } +} + +func bucketFinalizer(b *Bucket) { + if b.connPools != nil { + logging.Warnf("Finalizing a bucket with active connections.") + } +} + +// GetBucket gets a bucket from within this pool. +func (p *Pool) GetBucket(name string) (*Bucket, error) { + rv, ok := p.BucketMap[name] + if !ok { + return nil, &BucketNotFoundError{bucket: name} + } + runtime.SetFinalizer(&rv, bucketFinalizer) + err := rv.Refresh() + if err != nil { + return nil, err + } + return &rv, nil +} + +// GetBucket gets a bucket from within this pool. +func (p *Pool) GetBucketWithAuth(bucket, username, password string) (*Bucket, error) { + rv, ok := p.BucketMap[bucket] + if !ok { + return nil, &BucketNotFoundError{bucket: bucket} + } + runtime.SetFinalizer(&rv, bucketFinalizer) + rv.ah = newBucketAuth(username, password, bucket) + err := rv.Refresh() + if err != nil { + return nil, err + } + return &rv, nil +} + +// GetPool gets the pool to which this bucket belongs. +func (b *Bucket) GetPool() *Pool { + b.RLock() + defer b.RUnlock() + ret := b.pool + return ret +} + +// GetClient gets the client from which we got this pool. +func (p *Pool) GetClient() *Client { + return &p.client +} + +// GetBucket is a convenience function for getting a named bucket from +// a URL +func GetBucket(endpoint, poolname, bucketname string) (*Bucket, error) { + var err error + client, err := Connect(endpoint) + if err != nil { + return nil, err + } + + pool, err := client.GetPool(poolname) + if err != nil { + return nil, err + } + + return pool.GetBucket(bucketname) +} + +// ConnectWithAuthAndGetBucket is a convenience function for +// getting a named bucket from a given URL and an auth callback +func ConnectWithAuthAndGetBucket(endpoint, poolname, bucketname string, + ah AuthHandler) (*Bucket, error) { + client, err := ConnectWithAuth(endpoint, ah) + if err != nil { + return nil, err + } + + pool, err := client.GetPool(poolname) + if err != nil { + return nil, err + } + + return pool.GetBucket(bucketname) +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/streaming.go b/vendor/github.com/couchbaselabs/go-couchbase/streaming.go new file mode 100644 index 0000000000..6467635371 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/streaming.go @@ -0,0 +1,209 @@ +package couchbase + +import ( + "encoding/json" + "fmt" + "github.com/couchbase/goutils/logging" + "io" + "io/ioutil" + "math/rand" + "net" + "net/http" + "time" + "unsafe" +) + +// Bucket auto-updater gets the latest version of the bucket config from +// the server. If the configuration has changed then updated the local +// bucket information. If the bucket has been deleted then notify anyone +// who is holding a reference to this bucket + +const MAX_RETRY_COUNT = 5 +const DISCONNECT_PERIOD = 120 * time.Second + +type NotifyFn func(bucket string, err error) + +// Use TCP keepalive to detect half close sockets +var updaterTransport http.RoundTripper = &http.Transport{ + Proxy: http.ProxyFromEnvironment, + Dial: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).Dial, +} + +var updaterHTTPClient = &http.Client{Transport: updaterTransport} + +func doHTTPRequestForUpdate(req *http.Request) (*http.Response, error) { + + var err error + var res *http.Response + + for i := 0; i < HTTP_MAX_RETRY; i++ { + res, err = updaterHTTPClient.Do(req) + if err != nil && isHttpConnError(err) { + continue + } + break + } + + if err != nil { + return nil, err + } + + return res, err +} + +func (b *Bucket) RunBucketUpdater(notify NotifyFn) { + go func() { + err := b.UpdateBucket() + if err != nil { + if notify != nil { + notify(b.GetName(), err) + } + logging.Errorf(" Bucket Updater exited with err %v", err) + } + }() +} + +func (b *Bucket) replaceConnPools2(with []*connectionPool, bucketLocked bool) { + if !bucketLocked { + b.Lock() + defer b.Unlock() + } + old := b.connPools + b.connPools = unsafe.Pointer(&with) + if old != nil { + for _, pool := range *(*[]*connectionPool)(old) { + if pool != nil && pool.inUse == false { + pool.Close() + } + } + } + return +} + +func (b *Bucket) UpdateBucket() error { + + var failures int + var returnErr error + + for { + + if failures == MAX_RETRY_COUNT { + logging.Errorf(" Maximum failures reached. Exiting loop...") + return fmt.Errorf("Max failures reached. Last Error %v", returnErr) + } + + nodes := b.Nodes() + if len(nodes) < 1 { + return fmt.Errorf("No healthy nodes found") + } + + startNode := rand.Intn(len(nodes)) + node := nodes[(startNode)%len(nodes)] + + streamUrl := fmt.Sprintf("http://%s/pools/default/bucketsStreaming/%s", node.Hostname, b.GetName()) + logging.Infof(" Trying with %s", streamUrl) + req, err := http.NewRequest("GET", streamUrl, nil) + if err != nil { + return err + } + + b.RLock() + pool := b.pool + bucketName := b.Name + b.RUnlock() + scopes, err := getScopesAndCollections(pool, bucketName) + if err != nil { + return err + } + + // Lock here to avoid having pool closed under us. + b.RLock() + err = maybeAddAuth(req, b.pool.client.ah) + b.RUnlock() + if err != nil { + return err + } + + res, err := doHTTPRequestForUpdate(req) + if err != nil { + return err + } + + if res.StatusCode != 200 { + bod, _ := ioutil.ReadAll(io.LimitReader(res.Body, 512)) + logging.Errorf("Failed to connect to host, unexpected status code: %v. Body %s", res.StatusCode, bod) + res.Body.Close() + returnErr = fmt.Errorf("Failed to connect to host. Status %v Body %s", res.StatusCode, bod) + failures++ + continue + } + + dec := json.NewDecoder(res.Body) + + tmpb := &Bucket{} + for { + + err := dec.Decode(&tmpb) + if err != nil { + returnErr = err + res.Body.Close() + break + } + + // if we got here, reset failure count + failures = 0 + b.Lock() + + // mark all the old connection pools for deletion + pools := b.getConnPools(true /* already locked */) + for _, pool := range pools { + if pool != nil { + pool.inUse = false + } + } + + newcps := make([]*connectionPool, len(tmpb.VBSMJson.ServerList)) + for i := range newcps { + // get the old connection pool and check if it is still valid + pool := b.getConnPoolByHost(tmpb.VBSMJson.ServerList[i], true /* bucket already locked */) + if pool != nil && pool.inUse == false { + // if the hostname and index is unchanged then reuse this pool + newcps[i] = pool + pool.inUse = true + continue + } + // else create a new pool + if b.ah != nil { + newcps[i] = newConnectionPool( + tmpb.VBSMJson.ServerList[i], + b.ah, false, PoolSize, PoolOverflow) + + } else { + newcps[i] = newConnectionPool( + tmpb.VBSMJson.ServerList[i], + b.authHandler(true /* bucket already locked */), + false, PoolSize, PoolOverflow) + } + } + + b.replaceConnPools2(newcps, true /* bucket already locked */) + + tmpb.ah = b.ah + b.vBucketServerMap = unsafe.Pointer(&tmpb.VBSMJson) + b.nodeList = unsafe.Pointer(&tmpb.NodesJSON) + b.Scopes = scopes + b.Unlock() + + logging.Infof("Got new configuration for bucket %s", b.GetName()) + + } + // we are here because of an error + failures++ + continue + + } + return nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/tap.go b/vendor/github.com/couchbaselabs/go-couchbase/tap.go new file mode 100644 index 0000000000..86edd30554 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/tap.go @@ -0,0 +1,143 @@ +package couchbase + +import ( + "github.com/couchbase/gomemcached/client" + "github.com/couchbase/goutils/logging" + "sync" + "time" +) + +const initialRetryInterval = 1 * time.Second +const maximumRetryInterval = 30 * time.Second + +// A TapFeed streams mutation events from a bucket. +// +// Events from the bucket can be read from the channel 'C'. Remember +// to call Close() on it when you're done, unless its channel has +// closed itself already. +type TapFeed struct { + C <-chan memcached.TapEvent + + bucket *Bucket + args *memcached.TapArguments + nodeFeeds []*memcached.TapFeed // The TAP feeds of the individual nodes + output chan memcached.TapEvent // Same as C but writeably-typed + wg sync.WaitGroup + quit chan bool +} + +// StartTapFeed creates and starts a new Tap feed +func (b *Bucket) StartTapFeed(args *memcached.TapArguments) (*TapFeed, error) { + if args == nil { + defaultArgs := memcached.DefaultTapArguments() + args = &defaultArgs + } + + feed := &TapFeed{ + bucket: b, + args: args, + output: make(chan memcached.TapEvent, 10), + quit: make(chan bool), + } + + go feed.run() + + feed.C = feed.output + return feed, nil +} + +// Goroutine that runs the feed +func (feed *TapFeed) run() { + retryInterval := initialRetryInterval + bucketOK := true + for { + // Connect to the TAP feed of each server node: + if bucketOK { + killSwitch, err := feed.connectToNodes() + if err == nil { + // Run until one of the sub-feeds fails: + select { + case <-killSwitch: + case <-feed.quit: + return + } + feed.closeNodeFeeds() + retryInterval = initialRetryInterval + } + } + + // On error, try to refresh the bucket in case the list of nodes changed: + logging.Infof("go-couchbase: TAP connection lost; reconnecting to bucket %q in %v", + feed.bucket.Name, retryInterval) + err := feed.bucket.Refresh() + bucketOK = err == nil + + select { + case <-time.After(retryInterval): + case <-feed.quit: + return + } + if retryInterval *= 2; retryInterval > maximumRetryInterval { + retryInterval = maximumRetryInterval + } + } +} + +func (feed *TapFeed) connectToNodes() (killSwitch chan bool, err error) { + killSwitch = make(chan bool) + for _, serverConn := range feed.bucket.getConnPools(false /* not already locked */) { + var singleFeed *memcached.TapFeed + singleFeed, err = serverConn.StartTapFeed(feed.args) + if err != nil { + logging.Errorf("go-couchbase: Error connecting to tap feed of %s: %v", serverConn.host, err) + feed.closeNodeFeeds() + return + } + feed.nodeFeeds = append(feed.nodeFeeds, singleFeed) + go feed.forwardTapEvents(singleFeed, killSwitch, serverConn.host) + feed.wg.Add(1) + } + return +} + +// Goroutine that forwards Tap events from a single node's feed to the aggregate feed. +func (feed *TapFeed) forwardTapEvents(singleFeed *memcached.TapFeed, killSwitch chan bool, host string) { + defer feed.wg.Done() + for { + select { + case event, ok := <-singleFeed.C: + if !ok { + if singleFeed.Error != nil { + logging.Errorf("go-couchbase: Tap feed from %s failed: %v", host, singleFeed.Error) + } + killSwitch <- true + return + } + feed.output <- event + case <-feed.quit: + return + } + } +} + +func (feed *TapFeed) closeNodeFeeds() { + for _, f := range feed.nodeFeeds { + f.Close() + } + feed.nodeFeeds = nil +} + +// Close a Tap feed. +func (feed *TapFeed) Close() error { + select { + case <-feed.quit: + return nil + default: + } + + feed.closeNodeFeeds() + close(feed.quit) + feed.wg.Wait() + close(feed.output) + return nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/upr.go b/vendor/github.com/couchbaselabs/go-couchbase/upr.go new file mode 100644 index 0000000000..bf1b209b7e --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/upr.go @@ -0,0 +1,398 @@ +package couchbase + +import ( + "log" + "sync" + "time" + + "fmt" + "github.com/couchbase/gomemcached" + "github.com/couchbase/gomemcached/client" + "github.com/couchbase/goutils/logging" +) + +// A UprFeed streams mutation events from a bucket. +// +// Events from the bucket can be read from the channel 'C'. Remember +// to call Close() on it when you're done, unless its channel has +// closed itself already. +type UprFeed struct { + C <-chan *memcached.UprEvent + + bucket *Bucket + nodeFeeds map[string]*FeedInfo // The UPR feeds of the individual nodes + output chan *memcached.UprEvent // Same as C but writeably-typed + outputClosed bool + quit chan bool + name string // name of this UPR feed + sequence uint32 // sequence number for this feed + connected bool + killSwitch chan bool + closing bool + wg sync.WaitGroup + dcp_buffer_size uint32 + data_chan_size int +} + +// UprFeed from a single connection +type FeedInfo struct { + uprFeed *memcached.UprFeed // UPR feed handle + host string // hostname + connected bool // connected + quit chan bool // quit channel +} + +type FailoverLog map[uint16]memcached.FailoverLog + +// GetFailoverLogs, get the failover logs for a set of vbucket ids +func (b *Bucket) GetFailoverLogs(vBuckets []uint16) (FailoverLog, error) { + + // map vbids to their corresponding hosts + vbHostList := make(map[string][]uint16) + vbm := b.VBServerMap() + if len(vbm.VBucketMap) < len(vBuckets) { + return nil, fmt.Errorf("vbmap smaller than vbucket list: %v vs. %v", + vbm.VBucketMap, vBuckets) + } + + for _, vb := range vBuckets { + masterID := vbm.VBucketMap[vb][0] + master := b.getMasterNode(masterID) + if master == "" { + return nil, fmt.Errorf("No master found for vb %d", vb) + } + + vbList := vbHostList[master] + if vbList == nil { + vbList = make([]uint16, 0) + } + vbList = append(vbList, vb) + vbHostList[master] = vbList + } + + failoverLogMap := make(FailoverLog) + for _, serverConn := range b.getConnPools(false /* not already locked */) { + + vbList := vbHostList[serverConn.host] + if vbList == nil { + continue + } + + mc, err := serverConn.Get() + if err != nil { + logging.Infof("No Free connections for vblist %v", vbList) + return nil, fmt.Errorf("No Free connections for host %s", + serverConn.host) + + } + // close the connection so that it doesn't get reused for upr data + // connection + defer mc.Close() + failoverlogs, err := mc.UprGetFailoverLog(vbList) + if err != nil { + return nil, fmt.Errorf("Error getting failover log %s host %s", + err.Error(), serverConn.host) + + } + + for vb, log := range failoverlogs { + failoverLogMap[vb] = *log + } + } + + return failoverLogMap, nil +} + +func (b *Bucket) StartUprFeed(name string, sequence uint32) (*UprFeed, error) { + return b.StartUprFeedWithConfig(name, sequence, 10, DEFAULT_WINDOW_SIZE) +} + +// StartUprFeed creates and starts a new Upr feed +// No data will be sent on the channel unless vbuckets streams are requested +func (b *Bucket) StartUprFeedWithConfig(name string, sequence uint32, data_chan_size int, dcp_buffer_size uint32) (*UprFeed, error) { + + feed := &UprFeed{ + bucket: b, + output: make(chan *memcached.UprEvent, data_chan_size), + quit: make(chan bool), + nodeFeeds: make(map[string]*FeedInfo, 0), + name: name, + sequence: sequence, + killSwitch: make(chan bool), + dcp_buffer_size: dcp_buffer_size, + data_chan_size: data_chan_size, + } + + err := feed.connectToNodes() + if err != nil { + return nil, fmt.Errorf("Cannot connect to bucket %s", err.Error()) + } + feed.connected = true + go feed.run() + + feed.C = feed.output + return feed, nil +} + +// UprRequestStream starts a stream for a vb on a feed +func (feed *UprFeed) UprRequestStream(vb uint16, opaque uint16, flags uint32, + vuuid, startSequence, endSequence, snapStart, snapEnd uint64) error { + + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic in UprRequestStream. Feed %v Bucket %v", feed, feed.bucket) + } + }() + + vbm := feed.bucket.VBServerMap() + if len(vbm.VBucketMap) < int(vb) { + return fmt.Errorf("vbmap smaller than vbucket list: %v vs. %v", + vb, vbm.VBucketMap) + } + + if int(vb) >= len(vbm.VBucketMap) { + return fmt.Errorf("Invalid vbucket id %d", vb) + } + + masterID := vbm.VBucketMap[vb][0] + master := feed.bucket.getMasterNode(masterID) + if master == "" { + return fmt.Errorf("Master node not found for vbucket %d", vb) + } + singleFeed := feed.nodeFeeds[master] + if singleFeed == nil { + return fmt.Errorf("UprFeed for this host not found") + } + + if err := singleFeed.uprFeed.UprRequestStream(vb, opaque, flags, + vuuid, startSequence, endSequence, snapStart, snapEnd); err != nil { + return err + } + + return nil +} + +// UprCloseStream ends a vbucket stream. +func (feed *UprFeed) UprCloseStream(vb, opaqueMSB uint16) error { + + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic in UprCloseStream. Feed %v Bucket %v ", feed, feed.bucket) + } + }() + + vbm := feed.bucket.VBServerMap() + if len(vbm.VBucketMap) < int(vb) { + return fmt.Errorf("vbmap smaller than vbucket list: %v vs. %v", + vb, vbm.VBucketMap) + } + + if int(vb) >= len(vbm.VBucketMap) { + return fmt.Errorf("Invalid vbucket id %d", vb) + } + + masterID := vbm.VBucketMap[vb][0] + master := feed.bucket.getMasterNode(masterID) + if master == "" { + return fmt.Errorf("Master node not found for vbucket %d", vb) + } + singleFeed := feed.nodeFeeds[master] + if singleFeed == nil { + return fmt.Errorf("UprFeed for this host not found") + } + + if err := singleFeed.uprFeed.CloseStream(vb, opaqueMSB); err != nil { + return err + } + return nil +} + +// Goroutine that runs the feed +func (feed *UprFeed) run() { + retryInterval := initialRetryInterval + bucketOK := true + for { + // Connect to the UPR feed of each server node: + if bucketOK { + // Run until one of the sub-feeds fails: + select { + case <-feed.killSwitch: + case <-feed.quit: + return + } + //feed.closeNodeFeeds() + retryInterval = initialRetryInterval + } + + if feed.closing == true { + // we have been asked to shut down + return + } + + // On error, try to refresh the bucket in case the list of nodes changed: + logging.Infof("go-couchbase: UPR connection lost; reconnecting to bucket %q in %v", + feed.bucket.Name, retryInterval) + + if err := feed.bucket.Refresh(); err != nil { + // if we fail to refresh the bucket, exit the feed + // MB-14917 + logging.Infof("Unable to refresh bucket %s ", err.Error()) + close(feed.output) + feed.outputClosed = true + feed.closeNodeFeeds() + return + } + + // this will only connect to nodes that are not connected or changed + // user will have to reconnect the stream + err := feed.connectToNodes() + if err != nil { + logging.Infof("Unable to connect to nodes..exit ") + close(feed.output) + feed.outputClosed = true + feed.closeNodeFeeds() + return + } + bucketOK = err == nil + + select { + case <-time.After(retryInterval): + case <-feed.quit: + return + } + if retryInterval *= 2; retryInterval > maximumRetryInterval { + retryInterval = maximumRetryInterval + } + } +} + +func (feed *UprFeed) connectToNodes() (err error) { + nodeCount := 0 + for _, serverConn := range feed.bucket.getConnPools(false /* not already locked */) { + + // this maybe a reconnection, so check if the connection to the node + // already exists. Connect only if the node is not found in the list + // or connected == false + nodeFeed := feed.nodeFeeds[serverConn.host] + + if nodeFeed != nil && nodeFeed.connected == true { + continue + } + + var singleFeed *memcached.UprFeed + var name string + if feed.name == "" { + name = "DefaultUprClient" + } else { + name = feed.name + } + singleFeed, err = serverConn.StartUprFeed(name, feed.sequence, feed.dcp_buffer_size, feed.data_chan_size) + if err != nil { + logging.Errorf("go-couchbase: Error connecting to upr feed of %s: %v", serverConn.host, err) + feed.closeNodeFeeds() + return + } + // add the node to the connection map + feedInfo := &FeedInfo{ + uprFeed: singleFeed, + connected: true, + host: serverConn.host, + quit: make(chan bool), + } + feed.nodeFeeds[serverConn.host] = feedInfo + go feed.forwardUprEvents(feedInfo, feed.killSwitch, serverConn.host) + feed.wg.Add(1) + nodeCount++ + } + if nodeCount == 0 { + return fmt.Errorf("No connection to bucket") + } + + return nil +} + +// Goroutine that forwards Upr events from a single node's feed to the aggregate feed. +func (feed *UprFeed) forwardUprEvents(nodeFeed *FeedInfo, killSwitch chan bool, host string) { + singleFeed := nodeFeed.uprFeed + + defer func() { + feed.wg.Done() + if r := recover(); r != nil { + //if feed is not closing, re-throw the panic + if feed.outputClosed != true && feed.closing != true { + panic(r) + } else { + logging.Errorf("Panic is recovered. Since feed is closed, exit gracefully") + + } + } + }() + + for { + select { + case <-nodeFeed.quit: + nodeFeed.connected = false + return + + case event, ok := <-singleFeed.C: + if !ok { + if singleFeed.Error != nil { + logging.Errorf("go-couchbase: Upr feed from %s failed: %v", host, singleFeed.Error) + } + killSwitch <- true + return + } + if feed.outputClosed == true { + // someone closed the node feed + logging.Infof("Node need closed, returning from forwardUprEvent") + return + } + feed.output <- event + if event.Status == gomemcached.NOT_MY_VBUCKET { + logging.Infof(" Got a not my vbucket error !! ") + if err := feed.bucket.Refresh(); err != nil { + logging.Errorf("Unable to refresh bucket %s ", err.Error()) + feed.closeNodeFeeds() + return + } + // this will only connect to nodes that are not connected or changed + // user will have to reconnect the stream + if err := feed.connectToNodes(); err != nil { + logging.Errorf("Unable to connect to nodes %s", err.Error()) + return + } + + } + } + } +} + +func (feed *UprFeed) closeNodeFeeds() { + for _, f := range feed.nodeFeeds { + logging.Infof(" Sending close to forwardUprEvent ") + close(f.quit) + f.uprFeed.Close() + } + feed.nodeFeeds = nil +} + +// Close a Upr feed. +func (feed *UprFeed) Close() error { + select { + case <-feed.quit: + return nil + default: + } + + feed.closing = true + feed.closeNodeFeeds() + close(feed.quit) + + feed.wg.Wait() + if feed.outputClosed == false { + feed.outputClosed = true + close(feed.output) + } + + return nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/users.go b/vendor/github.com/couchbaselabs/go-couchbase/users.go new file mode 100644 index 0000000000..47d4861522 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/users.go @@ -0,0 +1,119 @@ +package couchbase + +import ( + "bytes" + "fmt" +) + +type User struct { + Name string + Id string + Domain string + Roles []Role +} + +type Role struct { + Role string + BucketName string `json:"bucket_name"` +} + +// Sample: +// {"role":"admin","name":"Admin","desc":"Can manage ALL cluster features including security.","ce":true} +// {"role":"query_select","bucket_name":"*","name":"Query Select","desc":"Can execute SELECT statement on bucket to retrieve data"} +type RoleDescription struct { + Role string + Name string + Desc string + Ce bool + BucketName string `json:"bucket_name"` +} + +// Return user-role data, as parsed JSON. +// Sample: +// [{"id":"ivanivanov","name":"Ivan Ivanov","roles":[{"role":"cluster_admin"},{"bucket_name":"default","role":"bucket_admin"}]}, +// {"id":"petrpetrov","name":"Petr Petrov","roles":[{"role":"replication_admin"}]}] +func (c *Client) GetUserRoles() ([]interface{}, error) { + ret := make([]interface{}, 0, 1) + err := c.parseURLResponse("/settings/rbac/users", &ret) + if err != nil { + return nil, err + } + + // Get the configured administrator. + // Expected result: {"port":8091,"username":"Administrator"} + adminInfo := make(map[string]interface{}, 2) + err = c.parseURLResponse("/settings/web", &adminInfo) + if err != nil { + return nil, err + } + + // Create a special entry for the configured administrator. + adminResult := map[string]interface{}{ + "name": adminInfo["username"], + "id": adminInfo["username"], + "domain": "ns_server", + "roles": []interface{}{ + map[string]interface{}{ + "role": "admin", + }, + }, + } + + // Add the configured administrator to the list of results. + ret = append(ret, adminResult) + + return ret, nil +} + +func (c *Client) GetUserInfoAll() ([]User, error) { + ret := make([]User, 0, 16) + err := c.parseURLResponse("/settings/rbac/users", &ret) + if err != nil { + return nil, err + } + return ret, nil +} + +func rolesToParamFormat(roles []Role) string { + var buffer bytes.Buffer + for i, role := range roles { + if i > 0 { + buffer.WriteString(",") + } + buffer.WriteString(role.Role) + if role.BucketName != "" { + buffer.WriteString("[") + buffer.WriteString(role.BucketName) + buffer.WriteString("]") + } + } + return buffer.String() +} + +func (c *Client) PutUserInfo(u *User) error { + params := map[string]interface{}{ + "name": u.Name, + "roles": rolesToParamFormat(u.Roles), + } + var target string + switch u.Domain { + case "external": + target = "/settings/rbac/users/" + u.Id + case "local": + target = "/settings/rbac/users/local/" + u.Id + default: + return fmt.Errorf("Unknown user type: %s", u.Domain) + } + var ret string // PUT returns an empty string. We ignore it. + err := c.parsePutURLResponse(target, params, &ret) + return err +} + +func (c *Client) GetRolesAll() ([]RoleDescription, error) { + ret := make([]RoleDescription, 0, 32) + err := c.parseURLResponse("/settings/rbac/roles", &ret) + if err != nil { + return nil, err + } + return ret, nil +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/util.go b/vendor/github.com/couchbaselabs/go-couchbase/util.go new file mode 100644 index 0000000000..4d286a3271 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/util.go @@ -0,0 +1,49 @@ +package couchbase + +import ( + "fmt" + "net/url" + "strings" +) + +// CleanupHost returns the hostname with the given suffix removed. +func CleanupHost(h, commonSuffix string) string { + if strings.HasSuffix(h, commonSuffix) { + return h[:len(h)-len(commonSuffix)] + } + return h +} + +// FindCommonSuffix returns the longest common suffix from the given +// strings. +func FindCommonSuffix(input []string) string { + rv := "" + if len(input) < 2 { + return "" + } + from := input + for i := len(input[0]); i > 0; i-- { + common := true + suffix := input[0][i:] + for _, s := range from { + if !strings.HasSuffix(s, suffix) { + common = false + break + } + } + if common { + rv = suffix + } + } + return rv +} + +// ParseURL is a wrapper around url.Parse with some sanity-checking +func ParseURL(urlStr string) (result *url.URL, err error) { + result, err = url.Parse(urlStr) + if result != nil && result.Scheme == "" { + result = nil + err = fmt.Errorf("invalid URL <%s>", urlStr) + } + return +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/vbmap.go b/vendor/github.com/couchbaselabs/go-couchbase/vbmap.go new file mode 100644 index 0000000000..b96a18ed57 --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/vbmap.go @@ -0,0 +1,77 @@ +package couchbase + +var crc32tab = []uint32{ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d} + +// VBHash finds the vbucket for the given key. +func (b *Bucket) VBHash(key string) uint32 { + crc := uint32(0xffffffff) + for x := 0; x < len(key); x++ { + crc = (crc >> 8) ^ crc32tab[(uint64(crc)^uint64(key[x]))&0xff] + } + vbm := b.VBServerMap() + return ((^crc) >> 16) & 0x7fff & (uint32(len(vbm.VBucketMap)) - 1) +} diff --git a/vendor/github.com/couchbaselabs/go-couchbase/views.go b/vendor/github.com/couchbaselabs/go-couchbase/views.go new file mode 100644 index 0000000000..2f68642f5a --- /dev/null +++ b/vendor/github.com/couchbaselabs/go-couchbase/views.go @@ -0,0 +1,231 @@ +package couchbase + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "net/url" + "time" +) + +// ViewRow represents a single result from a view. +// +// Doc is present only if include_docs was set on the request. +type ViewRow struct { + ID string + Key interface{} + Value interface{} + Doc *interface{} +} + +// A ViewError is a node-specific error indicating a partial failure +// within a view result. +type ViewError struct { + From string + Reason string +} + +func (ve ViewError) Error() string { + return "Node: " + ve.From + ", reason: " + ve.Reason +} + +// ViewResult holds the entire result set from a view request, +// including the rows and the errors. +type ViewResult struct { + TotalRows int `json:"total_rows"` + Rows []ViewRow + Errors []ViewError +} + +func (b *Bucket) randomBaseURL() (*url.URL, error) { + nodes := b.HealthyNodes() + if len(nodes) == 0 { + return nil, errors.New("no available couch rest URLs") + } + nodeNo := rand.Intn(len(nodes)) + node := nodes[nodeNo] + + b.RLock() + name := b.Name + pool := b.pool + b.RUnlock() + + u, err := ParseURL(node.CouchAPIBase) + if err != nil { + return nil, fmt.Errorf("config error: Bucket %q node #%d CouchAPIBase=%q: %v", + name, nodeNo, node.CouchAPIBase, err) + } else if pool != nil { + u.User = pool.client.BaseURL.User + } + return u, err +} + +const START_NODE_ID = -1 + +func (b *Bucket) randomNextURL(lastNode int) (*url.URL, int, error) { + nodes := b.HealthyNodes() + if len(nodes) == 0 { + return nil, -1, errors.New("no available couch rest URLs") + } + + var nodeNo int + if lastNode == START_NODE_ID || lastNode >= len(nodes) { + // randomly select a node if the value of lastNode is invalid + nodeNo = rand.Intn(len(nodes)) + } else { + // wrap around the node list + nodeNo = (lastNode + 1) % len(nodes) + } + + b.RLock() + name := b.Name + pool := b.pool + b.RUnlock() + + node := nodes[nodeNo] + u, err := ParseURL(node.CouchAPIBase) + if err != nil { + return nil, -1, fmt.Errorf("config error: Bucket %q node #%d CouchAPIBase=%q: %v", + name, nodeNo, node.CouchAPIBase, err) + } else if pool != nil { + u.User = pool.client.BaseURL.User + } + return u, nodeNo, err +} + +// DocID is the document ID type for the startkey_docid parameter in +// views. +type DocID string + +func qParam(k, v string) string { + format := `"%s"` + switch k { + case "startkey_docid", "endkey_docid", "stale": + format = "%s" + } + return fmt.Sprintf(format, v) +} + +// ViewURL constructs a URL for a view with the given ddoc, view name, +// and parameters. +func (b *Bucket) ViewURL(ddoc, name string, + params map[string]interface{}) (string, error) { + u, err := b.randomBaseURL() + if err != nil { + return "", err + } + + values := url.Values{} + for k, v := range params { + switch t := v.(type) { + case DocID: + values[k] = []string{string(t)} + case string: + values[k] = []string{qParam(k, t)} + case int: + values[k] = []string{fmt.Sprintf(`%d`, t)} + case bool: + values[k] = []string{fmt.Sprintf(`%v`, t)} + default: + b, err := json.Marshal(v) + if err != nil { + return "", fmt.Errorf("unsupported value-type %T in Query, "+ + "json encoder said %v", t, err) + } + values[k] = []string{fmt.Sprintf(`%v`, string(b))} + } + } + + if ddoc == "" && name == "_all_docs" { + u.Path = fmt.Sprintf("/%s/_all_docs", b.GetName()) + } else { + u.Path = fmt.Sprintf("/%s/_design/%s/_view/%s", b.GetName(), ddoc, name) + } + u.RawQuery = values.Encode() + + return u.String(), nil +} + +// ViewCallback is called for each view invocation. +var ViewCallback func(ddoc, name string, start time.Time, err error) + +// ViewCustom performs a view request that can map row values to a +// custom type. +// +// See the source to View for an example usage. +func (b *Bucket) ViewCustom(ddoc, name string, params map[string]interface{}, + vres interface{}) (err error) { + if SlowServerCallWarningThreshold > 0 { + defer slowLog(time.Now(), "call to ViewCustom(%q, %q)", ddoc, name) + } + + if ViewCallback != nil { + defer func(t time.Time) { ViewCallback(ddoc, name, t, err) }(time.Now()) + } + + u, err := b.ViewURL(ddoc, name, params) + if err != nil { + return err + } + + req, err := http.NewRequest("GET", u, nil) + if err != nil { + return err + } + + ah := b.authHandler(false /* bucket not yet locked */) + maybeAddAuth(req, ah) + + res, err := doHTTPRequest(req) + if err != nil { + return fmt.Errorf("error starting view req at %v: %v", u, err) + } + defer res.Body.Close() + + if res.StatusCode != 200 { + bod := make([]byte, 512) + l, _ := res.Body.Read(bod) + return fmt.Errorf("error executing view req at %v: %v - %s", + u, res.Status, bod[:l]) + } + + body, err := ioutil.ReadAll(res.Body) + if err := json.Unmarshal(body, vres); err != nil { + return nil + } + + return nil +} + +// View executes a view. +// +// The ddoc parameter is just the bare name of your design doc without +// the "_design/" prefix. +// +// Parameters are string keys with values that correspond to couchbase +// view parameters. Primitive should work fairly naturally (booleans, +// ints, strings, etc...) and other values will attempt to be JSON +// marshaled (useful for array indexing on on view keys, for example). +// +// Example: +// +// res, err := couchbase.View("myddoc", "myview", map[string]interface{}{ +// "group_level": 2, +// "startkey_docid": []interface{}{"thing"}, +// "endkey_docid": []interface{}{"thing", map[string]string{}}, +// "stale": false, +// }) +func (b *Bucket) View(ddoc, name string, params map[string]interface{}) (ViewResult, error) { + vres := ViewResult{} + + if err := b.ViewCustom(ddoc, name, params, &vres); err != nil { + //error in accessing views. Retry once after a bucket refresh + b.Refresh() + return vres, b.ViewCustom(ddoc, name, params, &vres) + } else { + return vres, nil + } +} diff --git a/vendor/github.com/go-macaron/session/couchbase/couchbase.go b/vendor/github.com/go-macaron/session/couchbase/couchbase.go new file mode 100644 index 0000000000..8001fd15f1 --- /dev/null +++ b/vendor/github.com/go-macaron/session/couchbase/couchbase.go @@ -0,0 +1,228 @@ +// Copyright 2013 Beego Authors +// Copyright 2014 The Macaron Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"): you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package session + +import ( + "strings" + "sync" + + "github.com/couchbaselabs/go-couchbase" + + "github.com/go-macaron/session" +) + +// CouchbaseSessionStore represents a couchbase session store implementation. +type CouchbaseSessionStore struct { + b *couchbase.Bucket + sid string + lock sync.RWMutex + data map[interface{}]interface{} + maxlifetime int64 +} + +// Set sets value to given key in session. +func (s *CouchbaseSessionStore) Set(key, val interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data[key] = val + return nil +} + +// Get gets value by given key in session. +func (s *CouchbaseSessionStore) Get(key interface{}) interface{} { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.data[key] +} + +// Delete delete a key from session. +func (s *CouchbaseSessionStore) Delete(key interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + delete(s.data, key) + return nil +} + +// ID returns current session ID. +func (s *CouchbaseSessionStore) ID() string { + return s.sid +} + +// Release releases resource and save data to provider. +func (s *CouchbaseSessionStore) Release() error { + defer s.b.Close() + + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + + data, err := session.EncodeGob(s.data) + if err != nil { + return err + } + + return s.b.Set(s.sid, int(s.maxlifetime), data) +} + +// Flush deletes all session data. +func (s *CouchbaseSessionStore) Flush() error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data = make(map[interface{}]interface{}) + return nil +} + +// CouchbaseProvider represents a couchbase session provider implementation. +type CouchbaseProvider struct { + maxlifetime int64 + connStr string + pool string + bucket string + b *couchbase.Bucket +} + +func (cp *CouchbaseProvider) getBucket() *couchbase.Bucket { + c, err := couchbase.Connect(cp.connStr) + if err != nil { + return nil + } + + pool, err := c.GetPool(cp.pool) + if err != nil { + return nil + } + + bucket, err := pool.GetBucket(cp.bucket) + if err != nil { + return nil + } + + return bucket +} + +// Init initializes memory session provider. +// connStr is couchbase server REST/JSON URL +// e.g. http://host:port/, Pool, Bucket +func (p *CouchbaseProvider) Init(maxlifetime int64, connStr string) error { + p.maxlifetime = maxlifetime + configs := strings.Split(connStr, ",") + if len(configs) > 0 { + p.connStr = configs[0] + } + if len(configs) > 1 { + p.pool = configs[1] + } + if len(configs) > 2 { + p.bucket = configs[2] + } + + return nil +} + +// Read returns raw session store by session ID. +func (p *CouchbaseProvider) Read(sid string) (session.RawStore, error) { + p.b = p.getBucket() + + var doc []byte + + err := p.b.Get(sid, &doc) + var kv map[interface{}]interface{} + if doc == nil { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(doc) + if err != nil { + return nil, err + } + } + + cs := &CouchbaseSessionStore{b: p.b, sid: sid, data: kv, maxlifetime: p.maxlifetime} + return cs, nil +} + +// Exist returns true if session with given ID exists. +func (p *CouchbaseProvider) Exist(sid string) bool { + p.b = p.getBucket() + defer p.b.Close() + + var doc []byte + + if err := p.b.Get(sid, &doc); err != nil || doc == nil { + return false + } else { + return true + } +} + +// Destory deletes a session by session ID. +func (p *CouchbaseProvider) Destory(sid string) error { + p.b = p.getBucket() + defer p.b.Close() + + p.b.Delete(sid) + return nil +} + +// Regenerate regenerates a session store from old session ID to new one. +func (p *CouchbaseProvider) Regenerate(oldsid, sid string) (session.RawStore, error) { + p.b = p.getBucket() + + var doc []byte + if err := p.b.Get(oldsid, &doc); err != nil || doc == nil { + p.b.Set(sid, int(p.maxlifetime), "") + } else { + err := p.b.Delete(oldsid) + if err != nil { + return nil, err + } + _, _ = p.b.Add(sid, int(p.maxlifetime), doc) + } + + err := p.b.Get(sid, &doc) + if err != nil { + return nil, err + } + var kv map[interface{}]interface{} + if doc == nil { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(doc) + if err != nil { + return nil, err + } + } + + cs := &CouchbaseSessionStore{b: p.b, sid: sid, data: kv, maxlifetime: p.maxlifetime} + return cs, nil +} + +// Count counts and returns number of sessions. +func (p *CouchbaseProvider) Count() int { + // FIXME + return 0 +} + +// GC calls GC to clean expired sessions. +func (p *CouchbaseProvider) GC() {} + +func init() { + session.Register("couchbase", &CouchbaseProvider{}) +} diff --git a/vendor/github.com/go-macaron/session/file.go b/vendor/github.com/go-macaron/session/file.go index 9bbc7aed20..64b47f2b00 100644 --- a/vendor/github.com/go-macaron/session/file.go +++ b/vendor/github.com/go-macaron/session/file.go @@ -81,6 +81,11 @@ func (s *FileStore) Release() error { s.p.lock.Lock() defer s.p.lock.Unlock() + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + data, err := EncodeGob(s.data) if err != nil { return err diff --git a/vendor/github.com/go-macaron/session/flash.go b/vendor/github.com/go-macaron/session/flash.go new file mode 100644 index 0000000000..99aae71e22 --- /dev/null +++ b/vendor/github.com/go-macaron/session/flash.go @@ -0,0 +1,61 @@ +// Copyright 2018 The Macaron Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"): you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package session + +import ( + "net/url" + + "gopkg.in/macaron.v1" +) + +type Flash struct { + ctx *macaron.Context + url.Values + ErrorMsg, WarningMsg, InfoMsg, SuccessMsg string +} + +func (f *Flash) set(name, msg string, current ...bool) { + isShow := false + if (len(current) == 0 && macaron.FlashNow) || + (len(current) > 0 && current[0]) { + isShow = true + } + + if isShow { + f.ctx.Data["Flash"] = f + } else { + f.Set(name, msg) + } +} + +func (f *Flash) Error(msg string, current ...bool) { + f.ErrorMsg = msg + f.set("error", msg, current...) +} + +func (f *Flash) Warning(msg string, current ...bool) { + f.WarningMsg = msg + f.set("warning", msg, current...) +} + +func (f *Flash) Info(msg string, current ...bool) { + f.InfoMsg = msg + f.set("info", msg, current...) +} + +func (f *Flash) Success(msg string, current ...bool) { + f.SuccessMsg = msg + f.set("success", msg, current...) +} diff --git a/vendor/github.com/go-macaron/session/memcache/memcache.go b/vendor/github.com/go-macaron/session/memcache/memcache.go new file mode 100644 index 0000000000..496939398b --- /dev/null +++ b/vendor/github.com/go-macaron/session/memcache/memcache.go @@ -0,0 +1,204 @@ +// Copyright 2013 Beego Authors +// Copyright 2014 The Macaron Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"): you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package session + +import ( + "fmt" + "strings" + "sync" + + "github.com/bradfitz/gomemcache/memcache" + + "github.com/go-macaron/session" +) + +// MemcacheStore represents a memcache session store implementation. +type MemcacheStore struct { + c *memcache.Client + sid string + expire int32 + lock sync.RWMutex + data map[interface{}]interface{} +} + +// NewMemcacheStore creates and returns a memcache session store. +func NewMemcacheStore(c *memcache.Client, sid string, expire int32, kv map[interface{}]interface{}) *MemcacheStore { + return &MemcacheStore{ + c: c, + sid: sid, + expire: expire, + data: kv, + } +} + +func NewItem(sid string, data []byte, expire int32) *memcache.Item { + return &memcache.Item{ + Key: sid, + Value: data, + Expiration: expire, + } +} + +// Set sets value to given key in session. +func (s *MemcacheStore) Set(key, val interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data[key] = val + return nil +} + +// Get gets value by given key in session. +func (s *MemcacheStore) Get(key interface{}) interface{} { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.data[key] +} + +// Delete delete a key from session. +func (s *MemcacheStore) Delete(key interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + delete(s.data, key) + return nil +} + +// ID returns current session ID. +func (s *MemcacheStore) ID() string { + return s.sid +} + +// Release releases resource and save data to provider. +func (s *MemcacheStore) Release() error { + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + + data, err := session.EncodeGob(s.data) + if err != nil { + return err + } + + return s.c.Set(NewItem(s.sid, data, s.expire)) +} + +// Flush deletes all session data. +func (s *MemcacheStore) Flush() error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data = make(map[interface{}]interface{}) + return nil +} + +// MemcacheProvider represents a memcache session provider implementation. +type MemcacheProvider struct { + c *memcache.Client + expire int32 +} + +// Init initializes memcache session provider. +// connStrs: 127.0.0.1:9090;127.0.0.1:9091 +func (p *MemcacheProvider) Init(expire int64, connStrs string) error { + p.expire = int32(expire) + p.c = memcache.New(strings.Split(connStrs, ";")...) + return nil +} + +// Read returns raw session store by session ID. +func (p *MemcacheProvider) Read(sid string) (session.RawStore, error) { + if !p.Exist(sid) { + if err := p.c.Set(NewItem(sid, []byte(""), p.expire)); err != nil { + return nil, err + } + } + + var kv map[interface{}]interface{} + item, err := p.c.Get(sid) + if err != nil { + return nil, err + } + if len(item.Value) == 0 { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(item.Value) + if err != nil { + return nil, err + } + } + + return NewMemcacheStore(p.c, sid, p.expire, kv), nil +} + +// Exist returns true if session with given ID exists. +func (p *MemcacheProvider) Exist(sid string) bool { + _, err := p.c.Get(sid) + return err == nil +} + +// Destory deletes a session by session ID. +func (p *MemcacheProvider) Destory(sid string) error { + return p.c.Delete(sid) +} + +// Regenerate regenerates a session store from old session ID to new one. +func (p *MemcacheProvider) Regenerate(oldsid, sid string) (_ session.RawStore, err error) { + if p.Exist(sid) { + return nil, fmt.Errorf("new sid '%s' already exists", sid) + } + + item := NewItem(sid, []byte(""), p.expire) + if p.Exist(oldsid) { + item, err = p.c.Get(oldsid) + if err != nil { + return nil, err + } else if err = p.c.Delete(oldsid); err != nil { + return nil, err + } + item.Key = sid + } + if err = p.c.Set(item); err != nil { + return nil, err + } + + var kv map[interface{}]interface{} + if len(item.Value) == 0 { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(item.Value) + if err != nil { + return nil, err + } + } + + return NewMemcacheStore(p.c, sid, p.expire, kv), nil +} + +// Count counts and returns number of sessions. +func (p *MemcacheProvider) Count() int { + // FIXME: how come this library does not have Stats method? + return -1 +} + +// GC calls GC to clean expired sessions. +func (p *MemcacheProvider) GC() {} + +func init() { + session.Register("memcache", &MemcacheProvider{}) +} diff --git a/vendor/github.com/go-macaron/session/mysql/mysql.go b/vendor/github.com/go-macaron/session/mysql/mysql.go new file mode 100644 index 0000000000..7bde37445e --- /dev/null +++ b/vendor/github.com/go-macaron/session/mysql/mysql.go @@ -0,0 +1,200 @@ +// Copyright 2013 Beego Authors +// Copyright 2014 The Macaron Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"): you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package session + +import ( + "database/sql" + "fmt" + "log" + "sync" + "time" + + _ "github.com/go-sql-driver/mysql" + + "github.com/go-macaron/session" +) + +// MysqlStore represents a mysql session store implementation. +type MysqlStore struct { + c *sql.DB + sid string + lock sync.RWMutex + data map[interface{}]interface{} +} + +// NewMysqlStore creates and returns a mysql session store. +func NewMysqlStore(c *sql.DB, sid string, kv map[interface{}]interface{}) *MysqlStore { + return &MysqlStore{ + c: c, + sid: sid, + data: kv, + } +} + +// Set sets value to given key in session. +func (s *MysqlStore) Set(key, val interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data[key] = val + return nil +} + +// Get gets value by given key in session. +func (s *MysqlStore) Get(key interface{}) interface{} { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.data[key] +} + +// Delete delete a key from session. +func (s *MysqlStore) Delete(key interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + delete(s.data, key) + return nil +} + +// ID returns current session ID. +func (s *MysqlStore) ID() string { + return s.sid +} + +// Release releases resource and save data to provider. +func (s *MysqlStore) Release() error { + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + + data, err := session.EncodeGob(s.data) + if err != nil { + return err + } + + _, err = s.c.Exec("UPDATE session SET data=?, expiry=? WHERE `key`=?", + data, time.Now().Unix(), s.sid) + return err +} + +// Flush deletes all session data. +func (s *MysqlStore) Flush() error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data = make(map[interface{}]interface{}) + return nil +} + +// MysqlProvider represents a mysql session provider implementation. +type MysqlProvider struct { + c *sql.DB + expire int64 +} + +// Init initializes mysql session provider. +// connStr: username:password@protocol(address)/dbname?param=value +func (p *MysqlProvider) Init(expire int64, connStr string) (err error) { + p.expire = expire + + p.c, err = sql.Open("mysql", connStr) + if err != nil { + return err + } + return p.c.Ping() +} + +// Read returns raw session store by session ID. +func (p *MysqlProvider) Read(sid string) (session.RawStore, error) { + var data []byte + err := p.c.QueryRow("SELECT data FROM session WHERE `key`=?", sid).Scan(&data) + if err == sql.ErrNoRows { + _, err = p.c.Exec("INSERT INTO session(`key`,data,expiry) VALUES(?,?,?)", + sid, "", time.Now().Unix()) + } + if err != nil { + return nil, err + } + + var kv map[interface{}]interface{} + if len(data) == 0 { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(data) + if err != nil { + return nil, err + } + } + + return NewMysqlStore(p.c, sid, kv), nil +} + +// Exist returns true if session with given ID exists. +func (p *MysqlProvider) Exist(sid string) bool { + var data []byte + err := p.c.QueryRow("SELECT data FROM session WHERE `key`=?", sid).Scan(&data) + if err != nil && err != sql.ErrNoRows { + panic("session/mysql: error checking existence: " + err.Error()) + } + return err != sql.ErrNoRows +} + +// Destory deletes a session by session ID. +func (p *MysqlProvider) Destory(sid string) error { + _, err := p.c.Exec("DELETE FROM session WHERE `key`=?", sid) + return err +} + +// Regenerate regenerates a session store from old session ID to new one. +func (p *MysqlProvider) Regenerate(oldsid, sid string) (_ session.RawStore, err error) { + if p.Exist(sid) { + return nil, fmt.Errorf("new sid '%s' already exists", sid) + } + + if !p.Exist(oldsid) { + if _, err = p.c.Exec("INSERT INTO session(`key`,data,expiry) VALUES(?,?,?)", + oldsid, "", time.Now().Unix()); err != nil { + return nil, err + } + } + + if _, err = p.c.Exec("UPDATE session SET `key`=? WHERE `key`=?", sid, oldsid); err != nil { + return nil, err + } + + return p.Read(sid) +} + +// Count counts and returns number of sessions. +func (p *MysqlProvider) Count() (total int) { + if err := p.c.QueryRow("SELECT COUNT(*) AS NUM FROM session").Scan(&total); err != nil { + panic("session/mysql: error counting records: " + err.Error()) + } + return total +} + +// GC calls GC to clean expired sessions. +func (p *MysqlProvider) GC() { + if _, err := p.c.Exec("DELETE FROM session WHERE expiry + ? <= UNIX_TIMESTAMP(NOW())", p.expire); err != nil { + log.Printf("session/mysql: error garbage collecting: %v", err) + } +} + +func init() { + session.Register("mysql", &MysqlProvider{}) +} diff --git a/vendor/github.com/go-macaron/session/nodb/nodb.go b/vendor/github.com/go-macaron/session/nodb/nodb.go new file mode 100644 index 0000000000..8b5a711693 --- /dev/null +++ b/vendor/github.com/go-macaron/session/nodb/nodb.go @@ -0,0 +1,208 @@ +// Copyright 2015 The Macaron Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"): you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package session + +import ( + "fmt" + "sync" + + "github.com/lunny/nodb" + "github.com/lunny/nodb/config" + + "github.com/go-macaron/session" +) + +// NodbStore represents a nodb session store implementation. +type NodbStore struct { + c *nodb.DB + sid string + expire int64 + lock sync.RWMutex + data map[interface{}]interface{} +} + +// NewNodbStore creates and returns a ledis session store. +func NewNodbStore(c *nodb.DB, sid string, expire int64, kv map[interface{}]interface{}) *NodbStore { + return &NodbStore{ + c: c, + expire: expire, + sid: sid, + data: kv, + } +} + +// Set sets value to given key in session. +func (s *NodbStore) Set(key, val interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data[key] = val + return nil +} + +// Get gets value by given key in session. +func (s *NodbStore) Get(key interface{}) interface{} { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.data[key] +} + +// Delete delete a key from session. +func (s *NodbStore) Delete(key interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + delete(s.data, key) + return nil +} + +// ID returns current session ID. +func (s *NodbStore) ID() string { + return s.sid +} + +// Release releases resource and save data to provider. +func (s *NodbStore) Release() error { + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + + data, err := session.EncodeGob(s.data) + if err != nil { + return err + } + + if err = s.c.Set([]byte(s.sid), data); err != nil { + return err + } + _, err = s.c.Expire([]byte(s.sid), s.expire) + return err +} + +// Flush deletes all session data. +func (s *NodbStore) Flush() error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data = make(map[interface{}]interface{}) + return nil +} + +// NodbProvider represents a ledis session provider implementation. +type NodbProvider struct { + c *nodb.DB + expire int64 +} + +// Init initializes nodb session provider. +func (p *NodbProvider) Init(expire int64, configs string) error { + p.expire = expire + + cfg := new(config.Config) + cfg.DataDir = configs + dbs, err := nodb.Open(cfg) + if err != nil { + return fmt.Errorf("session/nodb: error opening db: %v", err) + } + + p.c, err = dbs.Select(0) + return err +} + +// Read returns raw session store by session ID. +func (p *NodbProvider) Read(sid string) (session.RawStore, error) { + if !p.Exist(sid) { + if err := p.c.Set([]byte(sid), []byte("")); err != nil { + return nil, err + } + } + + var kv map[interface{}]interface{} + kvs, err := p.c.Get([]byte(sid)) + if err != nil { + return nil, err + } + if len(kvs) == 0 { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(kvs) + if err != nil { + return nil, err + } + } + + return NewNodbStore(p.c, sid, p.expire, kv), nil +} + +// Exist returns true if session with given ID exists. +func (p *NodbProvider) Exist(sid string) bool { + count, err := p.c.Exists([]byte(sid)) + return err == nil && count > 0 +} + +// Destory deletes a session by session ID. +func (p *NodbProvider) Destory(sid string) error { + _, err := p.c.Del([]byte(sid)) + return err +} + +// Regenerate regenerates a session store from old session ID to new one. +func (p *NodbProvider) Regenerate(oldsid, sid string) (_ session.RawStore, err error) { + if p.Exist(sid) { + return nil, fmt.Errorf("new sid '%s' already exists", sid) + } + + kvs := make([]byte, 0) + if p.Exist(oldsid) { + if kvs, err = p.c.Get([]byte(oldsid)); err != nil { + return nil, err + } else if _, err = p.c.Del([]byte(oldsid)); err != nil { + return nil, err + } + } + + if err = p.c.Set([]byte(sid), kvs); err != nil { + return nil, err + } else if _, err = p.c.Expire([]byte(sid), p.expire); err != nil { + return nil, err + } + + var kv map[interface{}]interface{} + if len(kvs) == 0 { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob([]byte(kvs)) + if err != nil { + return nil, err + } + } + + return NewNodbStore(p.c, sid, p.expire, kv), nil +} + +// Count counts and returns number of sessions. +func (p *NodbProvider) Count() int { + // FIXME: how come this library does not have DbSize() method? + return -1 +} + +// GC calls GC to clean expired sessions. +func (p *NodbProvider) GC() {} + +func init() { + session.Register("nodb", &NodbProvider{}) +} diff --git a/vendor/github.com/go-macaron/session/postgres/postgres.go b/vendor/github.com/go-macaron/session/postgres/postgres.go new file mode 100644 index 0000000000..f1e034b501 --- /dev/null +++ b/vendor/github.com/go-macaron/session/postgres/postgres.go @@ -0,0 +1,201 @@ +// Copyright 2013 Beego Authors +// Copyright 2014 The Macaron Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"): you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package session + +import ( + "database/sql" + "fmt" + "log" + "sync" + "time" + + _ "github.com/lib/pq" + + "github.com/go-macaron/session" +) + +// PostgresStore represents a postgres session store implementation. +type PostgresStore struct { + c *sql.DB + sid string + lock sync.RWMutex + data map[interface{}]interface{} +} + +// NewPostgresStore creates and returns a postgres session store. +func NewPostgresStore(c *sql.DB, sid string, kv map[interface{}]interface{}) *PostgresStore { + return &PostgresStore{ + c: c, + sid: sid, + data: kv, + } +} + +// Set sets value to given key in session. +func (s *PostgresStore) Set(key, value interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data[key] = value + return nil +} + +// Get gets value by given key in session. +func (s *PostgresStore) Get(key interface{}) interface{} { + s.lock.RLock() + defer s.lock.RUnlock() + + return s.data[key] +} + +// Delete delete a key from session. +func (s *PostgresStore) Delete(key interface{}) error { + s.lock.Lock() + defer s.lock.Unlock() + + delete(s.data, key) + return nil +} + +// ID returns current session ID. +func (s *PostgresStore) ID() string { + return s.sid +} + +// save postgres session values to database. +// must call this method to save values to database. +func (s *PostgresStore) Release() error { + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + + data, err := session.EncodeGob(s.data) + if err != nil { + return err + } + + _, err = s.c.Exec("UPDATE session SET data=$1, expiry=$2 WHERE key=$3", + data, time.Now().Unix(), s.sid) + return err +} + +// Flush deletes all session data. +func (s *PostgresStore) Flush() error { + s.lock.Lock() + defer s.lock.Unlock() + + s.data = make(map[interface{}]interface{}) + return nil +} + +// PostgresProvider represents a postgres session provider implementation. +type PostgresProvider struct { + c *sql.DB + maxlifetime int64 +} + +// Init initializes postgres session provider. +// connStr: user=a password=b host=localhost port=5432 dbname=c sslmode=disable +func (p *PostgresProvider) Init(maxlifetime int64, connStr string) (err error) { + p.maxlifetime = maxlifetime + + p.c, err = sql.Open("postgres", connStr) + if err != nil { + return err + } + return p.c.Ping() +} + +// Read returns raw session store by session ID. +func (p *PostgresProvider) Read(sid string) (session.RawStore, error) { + var data []byte + err := p.c.QueryRow("SELECT data FROM session WHERE key=$1", sid).Scan(&data) + if err == sql.ErrNoRows { + _, err = p.c.Exec("INSERT INTO session(key,data,expiry) VALUES($1,$2,$3)", + sid, "", time.Now().Unix()) + } + if err != nil { + return nil, err + } + + var kv map[interface{}]interface{} + if len(data) == 0 { + kv = make(map[interface{}]interface{}) + } else { + kv, err = session.DecodeGob(data) + if err != nil { + return nil, err + } + } + + return NewPostgresStore(p.c, sid, kv), nil +} + +// Exist returns true if session with given ID exists. +func (p *PostgresProvider) Exist(sid string) bool { + var data []byte + err := p.c.QueryRow("SELECT data FROM session WHERE key=$1", sid).Scan(&data) + if err != nil && err != sql.ErrNoRows { + panic("session/postgres: error checking existence: " + err.Error()) + } + return err != sql.ErrNoRows +} + +// Destory deletes a session by session ID. +func (p *PostgresProvider) Destory(sid string) error { + _, err := p.c.Exec("DELETE FROM session WHERE key=$1", sid) + return err +} + +// Regenerate regenerates a session store from old session ID to new one. +func (p *PostgresProvider) Regenerate(oldsid, sid string) (_ session.RawStore, err error) { + if p.Exist(sid) { + return nil, fmt.Errorf("new sid '%s' already exists", sid) + } + + if !p.Exist(oldsid) { + if _, err = p.c.Exec("INSERT INTO session(key,data,expiry) VALUES($1,$2,$3)", + oldsid, "", time.Now().Unix()); err != nil { + return nil, err + } + } + + if _, err = p.c.Exec("UPDATE session SET key=$1 WHERE key=$2", sid, oldsid); err != nil { + return nil, err + } + + return p.Read(sid) +} + +// Count counts and returns number of sessions. +func (p *PostgresProvider) Count() (total int) { + if err := p.c.QueryRow("SELECT COUNT(*) AS NUM FROM session").Scan(&total); err != nil { + panic("session/postgres: error counting records: " + err.Error()) + } + return total +} + +// GC calls GC to clean expired sessions. +func (p *PostgresProvider) GC() { + if _, err := p.c.Exec("DELETE FROM session WHERE EXTRACT(EPOCH FROM NOW()) - expiry > $1", p.maxlifetime); err != nil { + log.Printf("session/postgres: error garbage collecting: %v", err) + } +} + +func init() { + session.Register("postgres", &PostgresProvider{}) +} diff --git a/vendor/github.com/go-macaron/session/redis/redis.go b/vendor/github.com/go-macaron/session/redis/redis.go index ca1cf88de6..2d7fe98405 100644 --- a/vendor/github.com/go-macaron/session/redis/redis.go +++ b/vendor/github.com/go-macaron/session/redis/redis.go @@ -81,6 +81,11 @@ func (s *RedisStore) ID() string { // Release releases resource and save data to provider. func (s *RedisStore) Release() error { + // Skip encoding if the data is empty + if len(s.data) == 0 { + return nil + } + data, err := session.EncodeGob(s.data) if err != nil { return err @@ -153,7 +158,7 @@ func (p *RedisProvider) Init(maxlifetime int64, configs string) (err error) { func (p *RedisProvider) Read(sid string) (session.RawStore, error) { psid := p.prefix + sid if !p.Exist(sid) { - if err := p.c.Set(psid, "").Err(); err != nil { + if err := p.c.SetEx(psid, p.duration, "").Err(); err != nil { return nil, err } } diff --git a/vendor/github.com/go-macaron/session/session.go b/vendor/github.com/go-macaron/session/session.go index d9bbae2032..97fa56ede6 100644 --- a/vendor/github.com/go-macaron/session/session.go +++ b/vendor/github.com/go-macaron/session/session.go @@ -22,13 +22,12 @@ import ( "fmt" "net/http" "net/url" - "strings" "time" "gopkg.in/macaron.v1" ) -const _VERSION = "0.4.0" +const _VERSION = "0.6.0" func Version() string { return _VERSION @@ -96,6 +95,8 @@ type Options struct { IDLength int // Configuration section name. Default is "session". Section string + // Ignore release for websocket. Default is false. + IgnoreReleaseForWebSocket bool } func prepareOptions(options []Options) Options { @@ -138,6 +139,9 @@ func prepareOptions(options []Options) Options { if opt.IDLength == 0 { opt.IDLength = sec.Key("ID_LENGTH").MustInt(16) } + if !opt.IgnoreReleaseForWebSocket { + opt.IgnoreReleaseForWebSocket = sec.Key("IGNORE_RELEASE_FOR_WEBSOCKET").MustBool() + } return opt } @@ -187,6 +191,10 @@ func Sessioner(options ...Options) macaron.Handler { ctx.Next() + if manager.opt.IgnoreReleaseForWebSocket && ctx.Req.Header.Get("Upgrade") == "websocket" { + return + } + if err = sess.Release(); err != nil { panic("session(release): " + err.Error()) } @@ -252,12 +260,30 @@ func (m *Manager) sessionID() string { return hex.EncodeToString(generateRandomKey(m.opt.IDLength / 2)) } +// validSessionID tests whether a provided session ID is a valid session ID. +func (m *Manager) validSessionID(sid string) (bool, error) { + if len(sid) != m.opt.IDLength { + return false, errors.New("invalid 'sid': " + sid) + } + + for i := range sid { + switch { + case '0' <= sid[i] && sid[i] <= '9': + case 'a' <= sid[i] && sid[i] <= 'f': + default: + return false, errors.New("invalid 'sid': " + sid) + } + } + return true, nil +} + // Start starts a session by generating new one // or retrieve existence one by reading session ID from HTTP request if it's valid. func (m *Manager) Start(ctx *macaron.Context) (RawStore, error) { sid := ctx.GetCookie(m.opt.CookieName) - if len(sid) > 0 && m.provider.Exist(sid) { - return m.Read(sid) + valid, _ := m.validSessionID(sid) + if len(sid) > 0 && valid && m.provider.Exist(sid) { + return m.provider.Read(sid) } sid = m.sessionID() @@ -284,10 +310,9 @@ func (m *Manager) Start(ctx *macaron.Context) (RawStore, error) { // Read returns raw session store by session ID. func (m *Manager) Read(sid string) (RawStore, error) { - // No slashes or dots "./" should ever occur in the sid and to prevent session file forgery bug. - // See https://github.com/gogs/gogs/issues/5469 - if strings.ContainsAny(sid, "./") { - return nil, errors.New("invalid 'sid': " + sid) + // Ensure we're trying to read a valid session ID + if _, err := m.validSessionID(sid); err != nil { + return nil, err } return m.provider.Read(sid) @@ -300,6 +325,10 @@ func (m *Manager) Destory(ctx *macaron.Context) error { return nil } + if _, err := m.validSessionID(sid); err != nil { + return err + } + if err := m.provider.Destory(sid); err != nil { return err } @@ -318,11 +347,15 @@ func (m *Manager) Destory(ctx *macaron.Context) error { func (m *Manager) RegenerateId(ctx *macaron.Context) (sess RawStore, err error) { sid := m.sessionID() oldsid := ctx.GetCookie(m.opt.CookieName) + _, err = m.validSessionID(oldsid) + if err != nil { + return nil, err + } sess, err = m.provider.Regenerate(oldsid, sid) if err != nil { return nil, err } - ck := &http.Cookie{ + cookie := &http.Cookie{ Name: m.opt.CookieName, Value: sid, Path: m.opt.CookiePath, @@ -331,10 +364,10 @@ func (m *Manager) RegenerateId(ctx *macaron.Context) (sess RawStore, err error) Domain: m.opt.Domain, } if m.opt.CookieLifeTime >= 0 { - ck.MaxAge = m.opt.CookieLifeTime + cookie.MaxAge = m.opt.CookieLifeTime } - http.SetCookie(ctx.Resp, ck) - ctx.Req.AddCookie(ck) + http.SetCookie(ctx.Resp, cookie) + ctx.Req.AddCookie(cookie) return sess, nil } @@ -358,50 +391,3 @@ func (m *Manager) startGC() { func (m *Manager) SetSecure(secure bool) { m.opt.Secure = secure } - -// ___________.____ _____ _________ ___ ___ -// \_ _____/| | / _ \ / _____// | \ -// | __) | | / /_\ \ \_____ \/ ~ \ -// | \ | |___/ | \/ \ Y / -// \___ / |_______ \____|__ /_______ /\___|_ / -// \/ \/ \/ \/ \/ - -type Flash struct { - ctx *macaron.Context - url.Values - ErrorMsg, WarningMsg, InfoMsg, SuccessMsg string -} - -func (f *Flash) set(name, msg string, current ...bool) { - isShow := false - if (len(current) == 0 && macaron.FlashNow) || - (len(current) > 0 && current[0]) { - isShow = true - } - - if isShow { - f.ctx.Data["Flash"] = f - } else { - f.Set(name, msg) - } -} - -func (f *Flash) Error(msg string, current ...bool) { - f.ErrorMsg = msg - f.set("error", msg, current...) -} - -func (f *Flash) Warning(msg string, current ...bool) { - f.WarningMsg = msg - f.set("warning", msg, current...) -} - -func (f *Flash) Info(msg string, current ...bool) { - f.InfoMsg = msg - f.set("info", msg, current...) -} - -func (f *Flash) Success(msg string, current ...bool) { - f.SuccessMsg = msg - f.set("success", msg, current...) -} diff --git a/vendor/github.com/go-macaron/session/utils.go b/vendor/github.com/go-macaron/session/utils.go index 07a1283df9..90ca38064b 100644 --- a/vendor/github.com/go-macaron/session/utils.go +++ b/vendor/github.com/go-macaron/session/utils.go @@ -50,11 +50,14 @@ func DecodeGob(encoded []byte) (out map[interface{}]interface{}, err error) { return out, err } +// NOTE: A local copy in case of underlying package change +var alphanum = []byte("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") + // generateRandomKey creates a random key with the given strength. func generateRandomKey(strength int) []byte { k := make([]byte, strength) if n, err := io.ReadFull(rand.Reader, k); n != strength || err != nil { - return com.RandomCreateBytes(strength) + return com.RandomCreateBytes(strength, alphanum...) } return k } diff --git a/vendor/github.com/lunny/log/LICENSE b/vendor/github.com/lunny/log/LICENSE new file mode 100644 index 0000000000..c9338f8293 --- /dev/null +++ b/vendor/github.com/lunny/log/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2014 - 2016 lunny +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the {organization} nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/lunny/log/dbwriter.go b/vendor/github.com/lunny/log/dbwriter.go new file mode 100644 index 0000000000..e8ff00bd89 --- /dev/null +++ b/vendor/github.com/lunny/log/dbwriter.go @@ -0,0 +1,36 @@ +package log + +import ( + "database/sql" + "time" +) + +type DBWriter struct { + db *sql.DB + stmt *sql.Stmt + content chan []byte +} + +func NewDBWriter(db *sql.DB) (*DBWriter, error) { + _, err := db.Exec("CREATE TABLE IF NOT EXISTS log (id int, content text, created datetime)") + if err != nil { + return nil, err + } + stmt, err := db.Prepare("INSERT INTO log (content, created) values (?, ?)") + if err != nil { + return nil, err + } + return &DBWriter{db, stmt, make(chan []byte, 1000)}, nil +} + +func (w *DBWriter) Write(p []byte) (n int, err error) { + _, err = w.stmt.Exec(string(p), time.Now()) + if err == nil { + n = len(p) + } + return +} + +func (w *DBWriter) Close() { + w.stmt.Close() +} diff --git a/vendor/github.com/lunny/log/filewriter.go b/vendor/github.com/lunny/log/filewriter.go new file mode 100644 index 0000000000..f0bb4d1df1 --- /dev/null +++ b/vendor/github.com/lunny/log/filewriter.go @@ -0,0 +1,112 @@ +package log + +import ( + "io" + "os" + "path/filepath" + "sync" + "time" +) + +var _ io.Writer = &Files{} + +type ByType int + +const ( + ByDay ByType = iota + ByHour + ByMonth +) + +var ( + formats = map[ByType]string{ + ByDay: "2006-01-02", + ByHour: "2006-01-02-15", + ByMonth: "2006-01", + } +) + +func SetFileFormat(t ByType, format string) { + formats[t] = format +} + +func (b ByType) Format() string { + return formats[b] +} + +type Files struct { + FileOptions + f *os.File + lastFormat string + lock sync.Mutex +} + +type FileOptions struct { + Dir string + ByType ByType + Loc *time.Location +} + +func prepareFileOption(opts []FileOptions) FileOptions { + var opt FileOptions + if len(opts) > 0 { + opt = opts[0] + } + if opt.Dir == "" { + opt.Dir = "./" + } + err := os.MkdirAll(opt.Dir, os.ModePerm) + if err != nil { + panic(err.Error()) + } + + if opt.Loc == nil { + opt.Loc = time.Local + } + return opt +} + +func NewFileWriter(opts ...FileOptions) *Files { + opt := prepareFileOption(opts) + return &Files{ + FileOptions: opt, + } +} + +func (f *Files) getFile() (*os.File, error) { + var err error + t := time.Now().In(f.Loc) + if f.f == nil { + f.lastFormat = t.Format(f.ByType.Format()) + f.f, err = os.OpenFile(filepath.Join(f.Dir, f.lastFormat+".log"), + os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600) + return f.f, err + } + if f.lastFormat != t.Format(f.ByType.Format()) { + f.f.Close() + f.lastFormat = t.Format(f.ByType.Format()) + f.f, err = os.OpenFile(filepath.Join(f.Dir, f.lastFormat+".log"), + os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600) + return f.f, err + } + return f.f, nil +} + +func (f *Files) Write(bs []byte) (int, error) { + f.lock.Lock() + defer f.lock.Unlock() + + w, err := f.getFile() + if err != nil { + return 0, err + } + return w.Write(bs) +} + +func (f *Files) Close() { + if f.f != nil { + f.f.Close() + f.f = nil + } + f.lastFormat = "" +} diff --git a/vendor/github.com/lunny/log/logext.go b/vendor/github.com/lunny/log/logext.go new file mode 100644 index 0000000000..215c45f309 --- /dev/null +++ b/vendor/github.com/lunny/log/logext.go @@ -0,0 +1,595 @@ +package log + +import ( + "bytes" + "fmt" + "io" + "os" + "runtime" + "strings" + "sync" + "time" +) + +// These flags define which text to prefix to each log entry generated by the Logger. +const ( + // Bits or'ed together to control what's printed. There is no control over the + // order they appear (the order listed here) or the format they present (as + // described in the comments). A colon appears after these items: + // 2009/0123 01:23:23.123123 /a/b/c/d.go:23: message + Ldate = 1 << iota // the date: 2009/0123 + Ltime // the time: 01:23:23 + Lmicroseconds // microsecond resolution: 01:23:23.123123. assumes Ltime. + Llongfile // full file name and line number: /a/b/c/d.go:23 + Lshortfile // final file name element and line number: d.go:23. overrides Llongfile + Lmodule // module name + Llevel // level: 0(Debug), 1(Info), 2(Warn), 3(Error), 4(Panic), 5(Fatal) + Llongcolor // color will start [info] end of line + Lshortcolor // color only include [info] + LstdFlags = Ldate | Ltime // initial values for the standard logger + //Ldefault = Llevel | LstdFlags | Lshortfile | Llongcolor +) // [prefix][time][level][module][shortfile|longfile] + +func Ldefault() int { + if runtime.GOOS == "windows" { + return Llevel | LstdFlags | Lshortfile + } + return Llevel | LstdFlags | Lshortfile | Llongcolor +} + +func Version() string { + return "0.2.0.1121" +} + +const ( + Lall = iota +) +const ( + Ldebug = iota + Linfo + Lwarn + Lerror + Lpanic + Lfatal + Lnone +) + +const ( + ForeBlack = iota + 30 //30 + ForeRed //31 + ForeGreen //32 + ForeYellow //33 + ForeBlue //34 + ForePurple //35 + ForeCyan //36 + ForeWhite //37 +) + +const ( + BackBlack = iota + 40 //40 + BackRed //41 + BackGreen //42 + BackYellow //43 + BackBlue //44 + BackPurple //45 + BackCyan //46 + BackWhite //47 +) + +var levels = []string{ + "[Debug]", + "[Info]", + "[Warn]", + "[Error]", + "[Panic]", + "[Fatal]", +} + +// MUST called before all logs +func SetLevels(lvs []string) { + levels = lvs +} + +var colors = []int{ + ForeCyan, + ForeGreen, + ForeYellow, + ForeRed, + ForePurple, + ForeBlue, +} + +// MUST called before all logs +func SetColors(cls []int) { + colors = cls +} + +// A Logger represents an active logging object that generates lines of +// output to an io.Writer. Each logging operation makes a single call to +// the Writer's Write method. A Logger can be used simultaneously from +// multiple goroutines; it guarantees to serialize access to the Writer. +type Logger struct { + mu sync.Mutex // ensures atomic writes; protects the following fields + prefix string // prefix to write at beginning of each line + flag int // properties + Level int + out io.Writer // destination for output + buf bytes.Buffer // for accumulating text to write + levelStats [6]int64 + loc *time.Location +} + +// New creates a new Logger. The out variable sets the +// destination to which log data will be written. +// The prefix appears at the beginning of each generated log line. +// The flag argument defines the logging properties. +func New(out io.Writer, prefix string, flag int) *Logger { + l := &Logger{out: out, prefix: prefix, Level: 1, flag: flag, loc: time.Local} + if out != os.Stdout { + l.flag = RmColorFlags(l.flag) + } + return l +} + +var Std = New(os.Stderr, "", Ldefault()) + +// Cheap integer to fixed-width decimal ASCII. Give a negative width to avoid zero-padding. +// Knows the buffer has capacity. +func itoa(buf *bytes.Buffer, i int, wid int) { + var u uint = uint(i) + if u == 0 && wid <= 1 { + buf.WriteByte('0') + return + } + + // Assemble decimal in reverse order. + var b [32]byte + bp := len(b) + for ; u > 0 || wid > 0; u /= 10 { + bp-- + wid-- + b[bp] = byte(u%10) + '0' + } + + // avoid slicing b to avoid an allocation. + for bp < len(b) { + buf.WriteByte(b[bp]) + bp++ + } +} + +func moduleOf(file string) string { + pos := strings.LastIndex(file, "/") + if pos != -1 { + pos1 := strings.LastIndex(file[:pos], "/src/") + if pos1 != -1 { + return file[pos1+5 : pos] + } + } + return "UNKNOWN" +} + +func (l *Logger) formatHeader(buf *bytes.Buffer, t time.Time, + file string, line int, lvl int, reqId string) { + if l.prefix != "" { + buf.WriteString(l.prefix) + } + if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 { + if l.flag&Ldate != 0 { + year, month, day := t.Date() + itoa(buf, year, 4) + buf.WriteByte('/') + itoa(buf, int(month), 2) + buf.WriteByte('/') + itoa(buf, day, 2) + buf.WriteByte(' ') + } + if l.flag&(Ltime|Lmicroseconds) != 0 { + hour, min, sec := t.Clock() + itoa(buf, hour, 2) + buf.WriteByte(':') + itoa(buf, min, 2) + buf.WriteByte(':') + itoa(buf, sec, 2) + if l.flag&Lmicroseconds != 0 { + buf.WriteByte('.') + itoa(buf, t.Nanosecond()/1e3, 6) + } + buf.WriteByte(' ') + } + } + if reqId != "" { + buf.WriteByte('[') + buf.WriteString(reqId) + buf.WriteByte(']') + buf.WriteByte(' ') + } + + if l.flag&(Lshortcolor|Llongcolor) != 0 { + buf.WriteString(fmt.Sprintf("\033[1;%dm", colors[lvl])) + } + if l.flag&Llevel != 0 { + buf.WriteString(levels[lvl]) + buf.WriteByte(' ') + } + if l.flag&Lshortcolor != 0 { + buf.WriteString("\033[0m") + } + + if l.flag&Lmodule != 0 { + buf.WriteByte('[') + buf.WriteString(moduleOf(file)) + buf.WriteByte(']') + buf.WriteByte(' ') + } + if l.flag&(Lshortfile|Llongfile) != 0 { + if l.flag&Lshortfile != 0 { + short := file + for i := len(file) - 1; i > 0; i-- { + if file[i] == '/' { + short = file[i+1:] + break + } + } + file = short + } + buf.WriteString(file) + buf.WriteByte(':') + itoa(buf, line, -1) + buf.WriteByte(' ') + } +} + +// Output writes the output for a logging event. The string s contains +// the text to print after the prefix specified by the flags of the +// Logger. A newline is appended if the last character of s is not +// already a newline. Calldepth is used to recover the PC and is +// provided for generality, although at the moment on all pre-defined +// paths it will be 2. +func (l *Logger) Output(reqId string, lvl int, calldepth int, s string) error { + if lvl < l.Level { + return nil + } + now := time.Now().In(l.loc) // get this early. + var file string + var line int + l.mu.Lock() + defer l.mu.Unlock() + if l.flag&(Lshortfile|Llongfile|Lmodule) != 0 { + // release lock while getting caller info - it's expensive. + l.mu.Unlock() + var ok bool + _, file, line, ok = runtime.Caller(calldepth) + if !ok { + file = "???" + line = 0 + } + l.mu.Lock() + } + l.levelStats[lvl]++ + l.buf.Reset() + l.formatHeader(&l.buf, now, file, line, lvl, reqId) + l.buf.WriteString(s) + if l.flag&Llongcolor != 0 { + l.buf.WriteString("\033[0m") + } + if len(s) > 0 && s[len(s)-1] != '\n' { + l.buf.WriteByte('\n') + } + _, err := l.out.Write(l.buf.Bytes()) + return err +} + +// ----------------------------------------- + +// Printf calls l.Output to print to the logger. +// Arguments are handled in the manner of fmt.Printf. +func (l *Logger) Printf(format string, v ...interface{}) { + l.Output("", Linfo, 2, fmt.Sprintf(format, v...)) +} + +// Print calls l.Output to print to the logger. +// Arguments are handled in the manner of fmt.Print. +func (l *Logger) Print(v ...interface{}) { + l.Output("", Linfo, 2, fmt.Sprint(v...)) +} + +// Println calls l.Output to print to the logger. +// Arguments are handled in the manner of fmt.Println. +func (l *Logger) Println(v ...interface{}) { + l.Output("", Linfo, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func (l *Logger) Debugf(format string, v ...interface{}) { + l.Output("", Ldebug, 2, fmt.Sprintf(format, v...)) +} + +func (l *Logger) Debug(v ...interface{}) { + l.Output("", Ldebug, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- +func (l *Logger) Infof(format string, v ...interface{}) { + l.Output("", Linfo, 2, fmt.Sprintf(format, v...)) +} + +func (l *Logger) Info(v ...interface{}) { + l.Output("", Linfo, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- +func (l *Logger) Warnf(format string, v ...interface{}) { + l.Output("", Lwarn, 2, fmt.Sprintf(format, v...)) +} + +func (l *Logger) Warn(v ...interface{}) { + l.Output("", Lwarn, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func (l *Logger) Errorf(format string, v ...interface{}) { + l.Output("", Lerror, 2, fmt.Sprintf(format, v...)) +} + +func (l *Logger) Error(v ...interface{}) { + l.Output("", Lerror, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func (l *Logger) Fatal(v ...interface{}) { + l.Output("", Lfatal, 2, fmt.Sprintln(v...)) + os.Exit(1) +} + +// Fatalf is equivalent to l.Printf() followed by a call to os.Exit(1). +func (l *Logger) Fatalf(format string, v ...interface{}) { + l.Output("", Lfatal, 2, fmt.Sprintf(format, v...)) + os.Exit(1) +} + +// ----------------------------------------- +// Panic is equivalent to l.Print() followed by a call to panic(). +func (l *Logger) Panic(v ...interface{}) { + s := fmt.Sprintln(v...) + l.Output("", Lpanic, 2, s) + panic(s) +} + +// Panicf is equivalent to l.Printf() followed by a call to panic(). +func (l *Logger) Panicf(format string, v ...interface{}) { + s := fmt.Sprintf(format, v...) + l.Output("", Lpanic, 2, s) + panic(s) +} + +// ----------------------------------------- +func (l *Logger) Stack(v ...interface{}) { + s := fmt.Sprint(v...) + s += "\n" + buf := make([]byte, 1024*1024) + n := runtime.Stack(buf, true) + s += string(buf[:n]) + s += "\n" + l.Output("", Lerror, 2, s) +} + +// ----------------------------------------- +func (l *Logger) Stat() (stats []int64) { + l.mu.Lock() + v := l.levelStats + l.mu.Unlock() + return v[:] +} + +// Flags returns the output flags for the logger. +func (l *Logger) Flags() int { + l.mu.Lock() + defer l.mu.Unlock() + return l.flag +} + +func RmColorFlags(flag int) int { + // for un std out, it should not show color since almost them don't support + if flag&Llongcolor != 0 { + flag = flag ^ Llongcolor + } + if flag&Lshortcolor != 0 { + flag = flag ^ Lshortcolor + } + return flag +} + +func (l *Logger) Location() *time.Location { + return l.loc +} + +func (l *Logger) SetLocation(loc *time.Location) { + l.loc = loc +} + +// SetFlags sets the output flags for the logger. +func (l *Logger) SetFlags(flag int) { + l.mu.Lock() + defer l.mu.Unlock() + if l.out != os.Stdout { + flag = RmColorFlags(flag) + } + l.flag = flag +} + +// Prefix returns the output prefix for the logger. +func (l *Logger) Prefix() string { + l.mu.Lock() + defer l.mu.Unlock() + return l.prefix +} + +// SetPrefix sets the output prefix for the logger. +func (l *Logger) SetPrefix(prefix string) { + l.mu.Lock() + defer l.mu.Unlock() + l.prefix = prefix +} + +// SetOutputLevel sets the output level for the logger. +func (l *Logger) SetOutputLevel(lvl int) { + l.mu.Lock() + defer l.mu.Unlock() + l.Level = lvl +} + +func (l *Logger) OutputLevel() int { + return l.Level +} + +func (l *Logger) SetOutput(w io.Writer) { + l.mu.Lock() + defer l.mu.Unlock() + l.out = w + if w != os.Stdout { + l.flag = RmColorFlags(l.flag) + } +} + +// SetOutput sets the output destination for the standard logger. +func SetOutput(w io.Writer) { + Std.SetOutput(w) +} + +func SetLocation(loc *time.Location) { + Std.SetLocation(loc) +} + +func Location() *time.Location { + return Std.Location() +} + +// Flags returns the output flags for the standard logger. +func Flags() int { + return Std.Flags() +} + +// SetFlags sets the output flags for the standard logger. +func SetFlags(flag int) { + Std.SetFlags(flag) +} + +// Prefix returns the output prefix for the standard logger. +func Prefix() string { + return Std.Prefix() +} + +// SetPrefix sets the output prefix for the standard logger. +func SetPrefix(prefix string) { + Std.SetPrefix(prefix) +} + +func SetOutputLevel(lvl int) { + Std.SetOutputLevel(lvl) +} + +func OutputLevel() int { + return Std.OutputLevel() +} + +// ----------------------------------------- + +// Print calls Output to print to the standard logger. +// Arguments are handled in the manner of fmt.Print. +func Print(v ...interface{}) { + Std.Output("", Linfo, 2, fmt.Sprintln(v...)) +} + +// Printf calls Output to print to the standard logger. +// Arguments are handled in the manner of fmt.Printf. +func Printf(format string, v ...interface{}) { + Std.Output("", Linfo, 2, fmt.Sprintf(format, v...)) +} + +// Println calls Output to print to the standard logger. +// Arguments are handled in the manner of fmt.Println. +func Println(v ...interface{}) { + Std.Output("", Linfo, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func Debugf(format string, v ...interface{}) { + Std.Output("", Ldebug, 2, fmt.Sprintf(format, v...)) +} + +func Debug(v ...interface{}) { + Std.Output("", Ldebug, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func Infof(format string, v ...interface{}) { + Std.Output("", Linfo, 2, fmt.Sprintf(format, v...)) +} + +func Info(v ...interface{}) { + Std.Output("", Linfo, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func Warnf(format string, v ...interface{}) { + Std.Output("", Lwarn, 2, fmt.Sprintf(format, v...)) +} + +func Warn(v ...interface{}) { + Std.Output("", Lwarn, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +func Errorf(format string, v ...interface{}) { + Std.Output("", Lerror, 2, fmt.Sprintf(format, v...)) +} + +func Error(v ...interface{}) { + Std.Output("", Lerror, 2, fmt.Sprintln(v...)) +} + +// ----------------------------------------- + +// Fatal is equivalent to Print() followed by a call to os.Exit(1). +func Fatal(v ...interface{}) { + Std.Output("", Lfatal, 2, fmt.Sprintln(v...)) +} + +// Fatalf is equivalent to Printf() followed by a call to os.Exit(1). +func Fatalf(format string, v ...interface{}) { + Std.Output("", Lfatal, 2, fmt.Sprintf(format, v...)) +} + +// ----------------------------------------- + +// Panic is equivalent to Print() followed by a call to panic(). +func Panic(v ...interface{}) { + Std.Output("", Lpanic, 2, fmt.Sprintln(v...)) +} + +// Panicf is equivalent to Printf() followed by a call to panic(). +func Panicf(format string, v ...interface{}) { + Std.Output("", Lpanic, 2, fmt.Sprintf(format, v...)) +} + +// ----------------------------------------- + +func Stack(v ...interface{}) { + s := fmt.Sprint(v...) + s += "\n" + buf := make([]byte, 1024*1024) + n := runtime.Stack(buf, true) + s += string(buf[:n]) + s += "\n" + Std.Output("", Lerror, 2, s) +} + +// ----------------------------------------- diff --git a/vendor/github.com/lunny/nodb/LICENSE b/vendor/github.com/lunny/nodb/LICENSE new file mode 100644 index 0000000000..7ece9fdf5a --- /dev/null +++ b/vendor/github.com/lunny/nodb/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 siddontang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
\ No newline at end of file diff --git a/vendor/github.com/lunny/nodb/batch.go b/vendor/github.com/lunny/nodb/batch.go new file mode 100644 index 0000000000..e69d96a122 --- /dev/null +++ b/vendor/github.com/lunny/nodb/batch.go @@ -0,0 +1,106 @@ +package nodb + +import ( + "sync" + + "github.com/lunny/nodb/store" +) + +type batch struct { + l *Nodb + + store.WriteBatch + + sync.Locker + + logs [][]byte + + tx *Tx +} + +func (b *batch) Commit() error { + b.l.commitLock.Lock() + defer b.l.commitLock.Unlock() + + err := b.WriteBatch.Commit() + + if b.l.binlog != nil { + if err == nil { + if b.tx == nil { + b.l.binlog.Log(b.logs...) + } else { + b.tx.logs = append(b.tx.logs, b.logs...) + } + } + b.logs = [][]byte{} + } + + return err +} + +func (b *batch) Lock() { + b.Locker.Lock() +} + +func (b *batch) Unlock() { + if b.l.binlog != nil { + b.logs = [][]byte{} + } + b.WriteBatch.Rollback() + b.Locker.Unlock() +} + +func (b *batch) Put(key []byte, value []byte) { + if b.l.binlog != nil { + buf := encodeBinLogPut(key, value) + b.logs = append(b.logs, buf) + } + b.WriteBatch.Put(key, value) +} + +func (b *batch) Delete(key []byte) { + if b.l.binlog != nil { + buf := encodeBinLogDelete(key) + b.logs = append(b.logs, buf) + } + b.WriteBatch.Delete(key) +} + +type dbBatchLocker struct { + l *sync.Mutex + wrLock *sync.RWMutex +} + +func (l *dbBatchLocker) Lock() { + l.wrLock.RLock() + l.l.Lock() +} + +func (l *dbBatchLocker) Unlock() { + l.l.Unlock() + l.wrLock.RUnlock() +} + +type txBatchLocker struct { +} + +func (l *txBatchLocker) Lock() {} +func (l *txBatchLocker) Unlock() {} + +type multiBatchLocker struct { +} + +func (l *multiBatchLocker) Lock() {} +func (l *multiBatchLocker) Unlock() {} + +func (l *Nodb) newBatch(wb store.WriteBatch, locker sync.Locker, tx *Tx) *batch { + b := new(batch) + b.l = l + b.WriteBatch = wb + + b.tx = tx + b.Locker = locker + + b.logs = [][]byte{} + return b +} diff --git a/vendor/github.com/lunny/nodb/binlog.go b/vendor/github.com/lunny/nodb/binlog.go new file mode 100644 index 0000000000..4c094d9463 --- /dev/null +++ b/vendor/github.com/lunny/nodb/binlog.go @@ -0,0 +1,391 @@ +package nodb + +import ( + "bufio" + "encoding/binary" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "strconv" + "strings" + "sync" + "time" + + "github.com/lunny/log" + "github.com/lunny/nodb/config" +) + +type BinLogHead struct { + CreateTime uint32 + BatchId uint32 + PayloadLen uint32 +} + +func (h *BinLogHead) Len() int { + return 12 +} + +func (h *BinLogHead) Write(w io.Writer) error { + if err := binary.Write(w, binary.BigEndian, h.CreateTime); err != nil { + return err + } + + if err := binary.Write(w, binary.BigEndian, h.BatchId); err != nil { + return err + } + + if err := binary.Write(w, binary.BigEndian, h.PayloadLen); err != nil { + return err + } + + return nil +} + +func (h *BinLogHead) handleReadError(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } else { + return err + } +} + +func (h *BinLogHead) Read(r io.Reader) error { + var err error + if err = binary.Read(r, binary.BigEndian, &h.CreateTime); err != nil { + return err + } + + if err = binary.Read(r, binary.BigEndian, &h.BatchId); err != nil { + return h.handleReadError(err) + } + + if err = binary.Read(r, binary.BigEndian, &h.PayloadLen); err != nil { + return h.handleReadError(err) + } + + return nil +} + +func (h *BinLogHead) InSameBatch(ho *BinLogHead) bool { + if h.CreateTime == ho.CreateTime && h.BatchId == ho.BatchId { + return true + } else { + return false + } +} + +/* +index file format: +ledis-bin.00001 +ledis-bin.00002 +ledis-bin.00003 + +log file format + +Log: Head|PayloadData + +Head: createTime|batchId|payloadData + +*/ + +type BinLog struct { + sync.Mutex + + path string + + cfg *config.BinLogConfig + + logFile *os.File + + logWb *bufio.Writer + + indexName string + logNames []string + lastLogIndex int64 + + batchId uint32 + + ch chan struct{} +} + +func NewBinLog(cfg *config.Config) (*BinLog, error) { + l := new(BinLog) + + l.cfg = &cfg.BinLog + l.cfg.Adjust() + + l.path = path.Join(cfg.DataDir, "binlog") + + if err := os.MkdirAll(l.path, os.ModePerm); err != nil { + return nil, err + } + + l.logNames = make([]string, 0, 16) + + l.ch = make(chan struct{}) + + if err := l.loadIndex(); err != nil { + return nil, err + } + + return l, nil +} + +func (l *BinLog) flushIndex() error { + data := strings.Join(l.logNames, "\n") + + bakName := fmt.Sprintf("%s.bak", l.indexName) + f, err := os.OpenFile(bakName, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + log.Error("create binlog bak index error %s", err.Error()) + return err + } + + if _, err := f.WriteString(data); err != nil { + log.Error("write binlog index error %s", err.Error()) + f.Close() + return err + } + + f.Close() + + if err := os.Rename(bakName, l.indexName); err != nil { + log.Error("rename binlog bak index error %s", err.Error()) + return err + } + + return nil +} + +func (l *BinLog) loadIndex() error { + l.indexName = path.Join(l.path, fmt.Sprintf("ledis-bin.index")) + if _, err := os.Stat(l.indexName); os.IsNotExist(err) { + //no index file, nothing to do + } else { + indexData, err := ioutil.ReadFile(l.indexName) + if err != nil { + return err + } + + lines := strings.Split(string(indexData), "\n") + for _, line := range lines { + line = strings.Trim(line, "\r\n ") + if len(line) == 0 { + continue + } + + if _, err := os.Stat(path.Join(l.path, line)); err != nil { + log.Error("load index line %s error %s", line, err.Error()) + return err + } else { + l.logNames = append(l.logNames, line) + } + } + } + if l.cfg.MaxFileNum > 0 && len(l.logNames) > l.cfg.MaxFileNum { + //remove oldest logfile + if err := l.Purge(len(l.logNames) - l.cfg.MaxFileNum); err != nil { + return err + } + } + + var err error + if len(l.logNames) == 0 { + l.lastLogIndex = 1 + } else { + lastName := l.logNames[len(l.logNames)-1] + + if l.lastLogIndex, err = strconv.ParseInt(path.Ext(lastName)[1:], 10, 64); err != nil { + log.Error("invalid logfile name %s", err.Error()) + return err + } + + //like mysql, if server restart, a new binlog will create + l.lastLogIndex++ + } + + return nil +} + +func (l *BinLog) getLogFile() string { + return l.FormatLogFileName(l.lastLogIndex) +} + +func (l *BinLog) openNewLogFile() error { + var err error + lastName := l.getLogFile() + + logPath := path.Join(l.path, lastName) + if l.logFile, err = os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY, 0666); err != nil { + log.Error("open new logfile error %s", err.Error()) + return err + } + + if l.cfg.MaxFileNum > 0 && len(l.logNames) == l.cfg.MaxFileNum { + l.purge(1) + } + + l.logNames = append(l.logNames, lastName) + + if l.logWb == nil { + l.logWb = bufio.NewWriterSize(l.logFile, 1024) + } else { + l.logWb.Reset(l.logFile) + } + + if err = l.flushIndex(); err != nil { + return err + } + + return nil +} + +func (l *BinLog) checkLogFileSize() bool { + if l.logFile == nil { + return false + } + + st, _ := l.logFile.Stat() + if st.Size() >= int64(l.cfg.MaxFileSize) { + l.closeLog() + return true + } + + return false +} + +func (l *BinLog) closeLog() { + l.lastLogIndex++ + + l.logFile.Close() + l.logFile = nil +} + +func (l *BinLog) purge(n int) { + for i := 0; i < n; i++ { + logPath := path.Join(l.path, l.logNames[i]) + os.Remove(logPath) + } + + copy(l.logNames[0:], l.logNames[n:]) + l.logNames = l.logNames[0 : len(l.logNames)-n] +} + +func (l *BinLog) Close() { + if l.logFile != nil { + l.logFile.Close() + l.logFile = nil + } +} + +func (l *BinLog) LogNames() []string { + return l.logNames +} + +func (l *BinLog) LogFileName() string { + return l.getLogFile() +} + +func (l *BinLog) LogFilePos() int64 { + if l.logFile == nil { + return 0 + } else { + st, _ := l.logFile.Stat() + return st.Size() + } +} + +func (l *BinLog) LogFileIndex() int64 { + return l.lastLogIndex +} + +func (l *BinLog) FormatLogFileName(index int64) string { + return fmt.Sprintf("ledis-bin.%07d", index) +} + +func (l *BinLog) FormatLogFilePath(index int64) string { + return path.Join(l.path, l.FormatLogFileName(index)) +} + +func (l *BinLog) LogPath() string { + return l.path +} + +func (l *BinLog) Purge(n int) error { + l.Lock() + defer l.Unlock() + + if len(l.logNames) == 0 { + return nil + } + + if n >= len(l.logNames) { + n = len(l.logNames) + //can not purge current log file + if l.logNames[n-1] == l.getLogFile() { + n = n - 1 + } + } + + l.purge(n) + + return l.flushIndex() +} + +func (l *BinLog) PurgeAll() error { + l.Lock() + defer l.Unlock() + + l.closeLog() + return l.openNewLogFile() +} + +func (l *BinLog) Log(args ...[]byte) error { + l.Lock() + defer l.Unlock() + + var err error + + if l.logFile == nil { + if err = l.openNewLogFile(); err != nil { + return err + } + } + + head := &BinLogHead{} + + head.CreateTime = uint32(time.Now().Unix()) + head.BatchId = l.batchId + + l.batchId++ + + for _, data := range args { + head.PayloadLen = uint32(len(data)) + + if err := head.Write(l.logWb); err != nil { + return err + } + + if _, err := l.logWb.Write(data); err != nil { + return err + } + } + + if err = l.logWb.Flush(); err != nil { + log.Error("write log error %s", err.Error()) + return err + } + + l.checkLogFileSize() + + close(l.ch) + l.ch = make(chan struct{}) + + return nil +} + +func (l *BinLog) Wait() <-chan struct{} { + return l.ch +} diff --git a/vendor/github.com/lunny/nodb/binlog_util.go b/vendor/github.com/lunny/nodb/binlog_util.go new file mode 100644 index 0000000000..22124dda07 --- /dev/null +++ b/vendor/github.com/lunny/nodb/binlog_util.go @@ -0,0 +1,215 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "fmt" + "strconv" +) + +var ( + errBinLogDeleteType = errors.New("invalid bin log delete type") + errBinLogPutType = errors.New("invalid bin log put type") + errBinLogCommandType = errors.New("invalid bin log command type") +) + +func encodeBinLogDelete(key []byte) []byte { + buf := make([]byte, 1+len(key)) + buf[0] = BinLogTypeDeletion + copy(buf[1:], key) + return buf +} + +func decodeBinLogDelete(sz []byte) ([]byte, error) { + if len(sz) < 1 || sz[0] != BinLogTypeDeletion { + return nil, errBinLogDeleteType + } + + return sz[1:], nil +} + +func encodeBinLogPut(key []byte, value []byte) []byte { + buf := make([]byte, 3+len(key)+len(value)) + buf[0] = BinLogTypePut + pos := 1 + binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) + pos += 2 + copy(buf[pos:], key) + pos += len(key) + copy(buf[pos:], value) + + return buf +} + +func decodeBinLogPut(sz []byte) ([]byte, []byte, error) { + if len(sz) < 3 || sz[0] != BinLogTypePut { + return nil, nil, errBinLogPutType + } + + keyLen := int(binary.BigEndian.Uint16(sz[1:])) + if 3+keyLen > len(sz) { + return nil, nil, errBinLogPutType + } + + return sz[3 : 3+keyLen], sz[3+keyLen:], nil +} + +func FormatBinLogEvent(event []byte) (string, error) { + logType := uint8(event[0]) + + var err error + var k []byte + var v []byte + + var buf []byte = make([]byte, 0, 1024) + + switch logType { + case BinLogTypePut: + k, v, err = decodeBinLogPut(event) + buf = append(buf, "PUT "...) + case BinLogTypeDeletion: + k, err = decodeBinLogDelete(event) + buf = append(buf, "DELETE "...) + default: + err = errInvalidBinLogEvent + } + + if err != nil { + return "", err + } + + if buf, err = formatDataKey(buf, k); err != nil { + return "", err + } + + if v != nil && len(v) != 0 { + buf = append(buf, fmt.Sprintf(" %q", v)...) + } + + return String(buf), nil +} + +func formatDataKey(buf []byte, k []byte) ([]byte, error) { + if len(k) < 2 { + return nil, errInvalidBinLogEvent + } + + buf = append(buf, fmt.Sprintf("DB:%2d ", k[0])...) + buf = append(buf, fmt.Sprintf("%s ", TypeName[k[1]])...) + + db := new(DB) + db.index = k[0] + + //to do format at respective place + + switch k[1] { + case KVType: + if key, err := db.decodeKVKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + } + case HashType: + if key, field, err := db.hDecodeHashKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, String(field)) + } + case HSizeType: + if key, err := db.hDecodeSizeKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + } + case ListType: + if key, seq, err := db.lDecodeListKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, int64(seq), 10) + } + case LMetaType: + if key, err := db.lDecodeMetaKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + } + case ZSetType: + if key, m, err := db.zDecodeSetKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, String(m)) + } + case ZSizeType: + if key, err := db.zDecodeSizeKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + } + case ZScoreType: + if key, m, score, err := db.zDecodeScoreKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, String(m)) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, score, 10) + } + case BitType: + if key, seq, err := db.bDecodeBinKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendUint(buf, uint64(seq), 10) + } + case BitMetaType: + if key, err := db.bDecodeMetaKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + } + case SetType: + if key, member, err := db.sDecodeSetKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, String(member)) + } + case SSizeType: + if key, err := db.sDecodeSizeKey(k); err != nil { + return nil, err + } else { + buf = strconv.AppendQuote(buf, String(key)) + } + case ExpTimeType: + if tp, key, t, err := db.expDecodeTimeKey(k); err != nil { + return nil, err + } else { + buf = append(buf, TypeName[tp]...) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, String(key)) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, t, 10) + } + case ExpMetaType: + if tp, key, err := db.expDecodeMetaKey(k); err != nil { + return nil, err + } else { + buf = append(buf, TypeName[tp]...) + buf = append(buf, ' ') + buf = strconv.AppendQuote(buf, String(key)) + } + default: + return nil, errInvalidBinLogEvent + } + + return buf, nil +} diff --git a/vendor/github.com/lunny/nodb/config/config.go b/vendor/github.com/lunny/nodb/config/config.go new file mode 100644 index 0000000000..3b44d3043f --- /dev/null +++ b/vendor/github.com/lunny/nodb/config/config.go @@ -0,0 +1,135 @@ +package config + +import ( + "io/ioutil" + + "github.com/BurntSushi/toml" +) + +type Size int + +const ( + DefaultAddr string = "127.0.0.1:6380" + DefaultHttpAddr string = "127.0.0.1:11181" + + DefaultDBName string = "goleveldb" + + DefaultDataDir string = "./data" +) + +const ( + MaxBinLogFileSize int = 1024 * 1024 * 1024 + MaxBinLogFileNum int = 10000 + + DefaultBinLogFileSize int = MaxBinLogFileSize + DefaultBinLogFileNum int = 10 +) + +type LevelDBConfig struct { + Compression bool `toml:"compression"` + BlockSize int `toml:"block_size"` + WriteBufferSize int `toml:"write_buffer_size"` + CacheSize int `toml:"cache_size"` + MaxOpenFiles int `toml:"max_open_files"` +} + +type LMDBConfig struct { + MapSize int `toml:"map_size"` + NoSync bool `toml:"nosync"` +} + +type BinLogConfig struct { + MaxFileSize int `toml:"max_file_size"` + MaxFileNum int `toml:"max_file_num"` +} + +type Config struct { + DataDir string `toml:"data_dir"` + + DBName string `toml:"db_name"` + + LevelDB LevelDBConfig `toml:"leveldb"` + + LMDB LMDBConfig `toml:"lmdb"` + + BinLog BinLogConfig `toml:"binlog"` + + SlaveOf string `toml:"slaveof"` + + AccessLog string `toml:"access_log"` +} + +func NewConfigWithFile(fileName string) (*Config, error) { + data, err := ioutil.ReadFile(fileName) + if err != nil { + return nil, err + } + + return NewConfigWithData(data) +} + +func NewConfigWithData(data []byte) (*Config, error) { + cfg := NewConfigDefault() + + _, err := toml.Decode(string(data), cfg) + if err != nil { + return nil, err + } + + return cfg, nil +} + +func NewConfigDefault() *Config { + cfg := new(Config) + + cfg.DataDir = DefaultDataDir + + cfg.DBName = DefaultDBName + + // disable binlog + cfg.BinLog.MaxFileNum = 0 + cfg.BinLog.MaxFileSize = 0 + + // disable replication + cfg.SlaveOf = "" + + // disable access log + cfg.AccessLog = "" + + cfg.LMDB.MapSize = 20 * 1024 * 1024 + cfg.LMDB.NoSync = true + + return cfg +} + +func (cfg *LevelDBConfig) Adjust() { + if cfg.CacheSize <= 0 { + cfg.CacheSize = 4 * 1024 * 1024 + } + + if cfg.BlockSize <= 0 { + cfg.BlockSize = 4 * 1024 + } + + if cfg.WriteBufferSize <= 0 { + cfg.WriteBufferSize = 4 * 1024 * 1024 + } + + if cfg.MaxOpenFiles < 1024 { + cfg.MaxOpenFiles = 1024 + } +} + +func (cfg *BinLogConfig) Adjust() { + if cfg.MaxFileSize <= 0 { + cfg.MaxFileSize = DefaultBinLogFileSize + } else if cfg.MaxFileSize > MaxBinLogFileSize { + cfg.MaxFileSize = MaxBinLogFileSize + } + + if cfg.MaxFileNum <= 0 { + cfg.MaxFileNum = DefaultBinLogFileNum + } else if cfg.MaxFileNum > MaxBinLogFileNum { + cfg.MaxFileNum = MaxBinLogFileNum + } +} diff --git a/vendor/github.com/lunny/nodb/const.go b/vendor/github.com/lunny/nodb/const.go new file mode 100644 index 0000000000..446dae634e --- /dev/null +++ b/vendor/github.com/lunny/nodb/const.go @@ -0,0 +1,98 @@ +package nodb + +import ( + "errors" +) + +const ( + NoneType byte = 0 + KVType byte = 1 + HashType byte = 2 + HSizeType byte = 3 + ListType byte = 4 + LMetaType byte = 5 + ZSetType byte = 6 + ZSizeType byte = 7 + ZScoreType byte = 8 + BitType byte = 9 + BitMetaType byte = 10 + SetType byte = 11 + SSizeType byte = 12 + + maxDataType byte = 100 + + ExpTimeType byte = 101 + ExpMetaType byte = 102 +) + +var ( + TypeName = map[byte]string{ + KVType: "kv", + HashType: "hash", + HSizeType: "hsize", + ListType: "list", + LMetaType: "lmeta", + ZSetType: "zset", + ZSizeType: "zsize", + ZScoreType: "zscore", + BitType: "bit", + BitMetaType: "bitmeta", + SetType: "set", + SSizeType: "ssize", + ExpTimeType: "exptime", + ExpMetaType: "expmeta", + } +) + +const ( + defaultScanCount int = 10 +) + +var ( + errKeySize = errors.New("invalid key size") + errValueSize = errors.New("invalid value size") + errHashFieldSize = errors.New("invalid hash field size") + errSetMemberSize = errors.New("invalid set member size") + errZSetMemberSize = errors.New("invalid zset member size") + errExpireValue = errors.New("invalid expire value") +) + +const ( + //we don't support too many databases + MaxDBNumber uint8 = 16 + + //max key size + MaxKeySize int = 1024 + + //max hash field size + MaxHashFieldSize int = 1024 + + //max zset member size + MaxZSetMemberSize int = 1024 + + //max set member size + MaxSetMemberSize int = 1024 + + //max value size + MaxValueSize int = 10 * 1024 * 1024 +) + +var ( + ErrScoreMiss = errors.New("zset score miss") +) + +const ( + BinLogTypeDeletion uint8 = 0x0 + BinLogTypePut uint8 = 0x1 + BinLogTypeCommand uint8 = 0x2 +) + +const ( + DBAutoCommit uint8 = 0x0 + DBInTransaction uint8 = 0x1 + DBInMulti uint8 = 0x2 +) + +var ( + Version = "0.1" +) diff --git a/vendor/github.com/lunny/nodb/doc.go b/vendor/github.com/lunny/nodb/doc.go new file mode 100644 index 0000000000..2f7df33ffd --- /dev/null +++ b/vendor/github.com/lunny/nodb/doc.go @@ -0,0 +1,61 @@ +// package nodb is a high performance embedded NoSQL. +// +// nodb supports various data structure like kv, list, hash and zset like redis. +// +// Other features include binlog replication, data with a limited time-to-live. +// +// Usage +// +// First create a nodb instance before use: +// +// l := nodb.Open(cfg) +// +// cfg is a Config instance which contains configuration for nodb use, +// like DataDir (root directory for nodb working to store data). +// +// After you create a nodb instance, you can select a DB to store you data: +// +// db, _ := l.Select(0) +// +// DB must be selected by a index, nodb supports only 16 databases, so the index range is [0-15]. +// +// KV +// +// KV is the most basic nodb type like any other key-value database. +// +// err := db.Set(key, value) +// value, err := db.Get(key) +// +// List +// +// List is simply lists of values, sorted by insertion order. +// You can push or pop value on the list head (left) or tail (right). +// +// err := db.LPush(key, value1) +// err := db.RPush(key, value2) +// value1, err := db.LPop(key) +// value2, err := db.RPop(key) +// +// Hash +// +// Hash is a map between fields and values. +// +// n, err := db.HSet(key, field1, value1) +// n, err := db.HSet(key, field2, value2) +// value1, err := db.HGet(key, field1) +// value2, err := db.HGet(key, field2) +// +// ZSet +// +// ZSet is a sorted collections of values. +// Every member of zset is associated with score, a int64 value which used to sort, from smallest to greatest score. +// Members are unique, but score may be same. +// +// n, err := db.ZAdd(key, ScorePair{score1, member1}, ScorePair{score2, member2}) +// ay, err := db.ZRangeByScore(key, minScore, maxScore, 0, -1) +// +// Binlog +// +// nodb supports binlog, so you can sync binlog to another server for replication. If you want to open binlog support, set UseBinLog to true in config. +// +package nodb diff --git a/vendor/github.com/lunny/nodb/dump.go b/vendor/github.com/lunny/nodb/dump.go new file mode 100644 index 0000000000..3c9722e00d --- /dev/null +++ b/vendor/github.com/lunny/nodb/dump.go @@ -0,0 +1,200 @@ +package nodb + +import ( + "bufio" + "bytes" + "encoding/binary" + "io" + "os" + + "github.com/siddontang/go-snappy/snappy" +) + +//dump format +// fileIndex(bigendian int64)|filePos(bigendian int64) +// |keylen(bigendian int32)|key|valuelen(bigendian int32)|value...... +// +//key and value are both compressed for fast transfer dump on network using snappy + +type BinLogAnchor struct { + LogFileIndex int64 + LogPos int64 +} + +func (m *BinLogAnchor) WriteTo(w io.Writer) error { + if err := binary.Write(w, binary.BigEndian, m.LogFileIndex); err != nil { + return err + } + + if err := binary.Write(w, binary.BigEndian, m.LogPos); err != nil { + return err + } + return nil +} + +func (m *BinLogAnchor) ReadFrom(r io.Reader) error { + err := binary.Read(r, binary.BigEndian, &m.LogFileIndex) + if err != nil { + return err + } + + err = binary.Read(r, binary.BigEndian, &m.LogPos) + if err != nil { + return err + } + + return nil +} + +func (l *Nodb) DumpFile(path string) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + return l.Dump(f) +} + +func (l *Nodb) Dump(w io.Writer) error { + m := new(BinLogAnchor) + + var err error + + l.wLock.Lock() + defer l.wLock.Unlock() + + if l.binlog != nil { + m.LogFileIndex = l.binlog.LogFileIndex() + m.LogPos = l.binlog.LogFilePos() + } + + wb := bufio.NewWriterSize(w, 4096) + if err = m.WriteTo(wb); err != nil { + return err + } + + it := l.ldb.NewIterator() + it.SeekToFirst() + + compressBuf := make([]byte, 4096) + + var key []byte + var value []byte + for ; it.Valid(); it.Next() { + key = it.RawKey() + value = it.RawValue() + + if key, err = snappy.Encode(compressBuf, key); err != nil { + return err + } + + if err = binary.Write(wb, binary.BigEndian, uint16(len(key))); err != nil { + return err + } + + if _, err = wb.Write(key); err != nil { + return err + } + + if value, err = snappy.Encode(compressBuf, value); err != nil { + return err + } + + if err = binary.Write(wb, binary.BigEndian, uint32(len(value))); err != nil { + return err + } + + if _, err = wb.Write(value); err != nil { + return err + } + } + + if err = wb.Flush(); err != nil { + return err + } + + compressBuf = nil + + return nil +} + +func (l *Nodb) LoadDumpFile(path string) (*BinLogAnchor, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return l.LoadDump(f) +} + +func (l *Nodb) LoadDump(r io.Reader) (*BinLogAnchor, error) { + l.wLock.Lock() + defer l.wLock.Unlock() + + info := new(BinLogAnchor) + + rb := bufio.NewReaderSize(r, 4096) + + err := info.ReadFrom(rb) + if err != nil { + return nil, err + } + + var keyLen uint16 + var valueLen uint32 + + var keyBuf bytes.Buffer + var valueBuf bytes.Buffer + + deKeyBuf := make([]byte, 4096) + deValueBuf := make([]byte, 4096) + + var key, value []byte + + for { + if err = binary.Read(rb, binary.BigEndian, &keyLen); err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + break + } + + if _, err = io.CopyN(&keyBuf, rb, int64(keyLen)); err != nil { + return nil, err + } + + if key, err = snappy.Decode(deKeyBuf, keyBuf.Bytes()); err != nil { + return nil, err + } + + if err = binary.Read(rb, binary.BigEndian, &valueLen); err != nil { + return nil, err + } + + if _, err = io.CopyN(&valueBuf, rb, int64(valueLen)); err != nil { + return nil, err + } + + if value, err = snappy.Decode(deValueBuf, valueBuf.Bytes()); err != nil { + return nil, err + } + + if err = l.ldb.Put(key, value); err != nil { + return nil, err + } + + keyBuf.Reset() + valueBuf.Reset() + } + + deKeyBuf = nil + deValueBuf = nil + + //if binlog enable, we will delete all binlogs and open a new one for handling simply + if l.binlog != nil { + l.binlog.PurgeAll() + } + + return info, nil +} diff --git a/vendor/github.com/lunny/nodb/info.go b/vendor/github.com/lunny/nodb/info.go new file mode 100644 index 0000000000..3fd37e3d44 --- /dev/null +++ b/vendor/github.com/lunny/nodb/info.go @@ -0,0 +1,24 @@ +package nodb + +// todo, add info + +// type Keyspace struct { +// Kvs int `json:"kvs"` +// KvExpires int `json:"kv_expires"` + +// Lists int `json:"lists"` +// ListExpires int `json:"list_expires"` + +// Bitmaps int `json:"bitmaps"` +// BitmapExpires int `json:"bitmap_expires"` + +// ZSets int `json:"zsets"` +// ZSetExpires int `json:"zset_expires"` + +// Hashes int `json:"hashes"` +// HashExpires int `json:"hahsh_expires"` +// } + +// type Info struct { +// KeySpaces [MaxDBNumber]Keyspace +// } diff --git a/vendor/github.com/lunny/nodb/multi.go b/vendor/github.com/lunny/nodb/multi.go new file mode 100644 index 0000000000..ca581ce9a2 --- /dev/null +++ b/vendor/github.com/lunny/nodb/multi.go @@ -0,0 +1,73 @@ +package nodb + +import ( + "errors" + "fmt" +) + +var ( + ErrNestMulti = errors.New("nest multi not supported") + ErrMultiDone = errors.New("multi has been closed") +) + +type Multi struct { + *DB +} + +func (db *DB) IsInMulti() bool { + return db.status == DBInMulti +} + +// begin a mutli to execute commands, +// it will block any other write operations before you close the multi, unlike transaction, mutli can not rollback +func (db *DB) Multi() (*Multi, error) { + if db.IsInMulti() { + return nil, ErrNestMulti + } + + m := new(Multi) + + m.DB = new(DB) + m.DB.status = DBInMulti + + m.DB.l = db.l + + m.l.wLock.Lock() + + m.DB.sdb = db.sdb + + m.DB.bucket = db.sdb + + m.DB.index = db.index + + m.DB.kvBatch = m.newBatch() + m.DB.listBatch = m.newBatch() + m.DB.hashBatch = m.newBatch() + m.DB.zsetBatch = m.newBatch() + m.DB.binBatch = m.newBatch() + m.DB.setBatch = m.newBatch() + + return m, nil +} + +func (m *Multi) newBatch() *batch { + return m.l.newBatch(m.bucket.NewWriteBatch(), &multiBatchLocker{}, nil) +} + +func (m *Multi) Close() error { + if m.bucket == nil { + return ErrMultiDone + } + m.l.wLock.Unlock() + m.bucket = nil + return nil +} + +func (m *Multi) Select(index int) error { + if index < 0 || index >= int(MaxDBNumber) { + return fmt.Errorf("invalid db index %d", index) + } + + m.DB.index = uint8(index) + return nil +} diff --git a/vendor/github.com/lunny/nodb/nodb.go b/vendor/github.com/lunny/nodb/nodb.go new file mode 100644 index 0000000000..fdd0272c94 --- /dev/null +++ b/vendor/github.com/lunny/nodb/nodb.go @@ -0,0 +1,128 @@ +package nodb + +import ( + "fmt" + "sync" + "time" + + "github.com/lunny/log" + "github.com/lunny/nodb/config" + "github.com/lunny/nodb/store" +) + +type Nodb struct { + cfg *config.Config + + ldb *store.DB + dbs [MaxDBNumber]*DB + + quit chan struct{} + jobs *sync.WaitGroup + + binlog *BinLog + + wLock sync.RWMutex //allow one write at same time + commitLock sync.Mutex //allow one write commit at same time +} + +func Open(cfg *config.Config) (*Nodb, error) { + if len(cfg.DataDir) == 0 { + cfg.DataDir = config.DefaultDataDir + } + + ldb, err := store.Open(cfg) + if err != nil { + return nil, err + } + + l := new(Nodb) + + l.quit = make(chan struct{}) + l.jobs = new(sync.WaitGroup) + + l.ldb = ldb + + if cfg.BinLog.MaxFileNum > 0 && cfg.BinLog.MaxFileSize > 0 { + l.binlog, err = NewBinLog(cfg) + if err != nil { + return nil, err + } + } else { + l.binlog = nil + } + + for i := uint8(0); i < MaxDBNumber; i++ { + l.dbs[i] = l.newDB(i) + } + + l.activeExpireCycle() + + return l, nil +} + +func (l *Nodb) Close() { + close(l.quit) + l.jobs.Wait() + + l.ldb.Close() + + if l.binlog != nil { + l.binlog.Close() + l.binlog = nil + } +} + +func (l *Nodb) Select(index int) (*DB, error) { + if index < 0 || index >= int(MaxDBNumber) { + return nil, fmt.Errorf("invalid db index %d", index) + } + + return l.dbs[index], nil +} + +func (l *Nodb) FlushAll() error { + for index, db := range l.dbs { + if _, err := db.FlushAll(); err != nil { + log.Error("flush db %d error %s", index, err.Error()) + } + } + + return nil +} + +// very dangerous to use +func (l *Nodb) DataDB() *store.DB { + return l.ldb +} + +func (l *Nodb) activeExpireCycle() { + var executors []*elimination = make([]*elimination, len(l.dbs)) + for i, db := range l.dbs { + executors[i] = db.newEliminator() + } + + l.jobs.Add(1) + go func() { + tick := time.NewTicker(1 * time.Second) + end := false + done := make(chan struct{}) + for !end { + select { + case <-tick.C: + go func() { + for _, eli := range executors { + eli.active() + } + done <- struct{}{} + }() + <-done + case <-l.quit: + end = true + break + } + } + + tick.Stop() + l.jobs.Done() + }() +} diff --git a/vendor/github.com/lunny/nodb/nodb_db.go b/vendor/github.com/lunny/nodb/nodb_db.go new file mode 100644 index 0000000000..f68ebaa0d4 --- /dev/null +++ b/vendor/github.com/lunny/nodb/nodb_db.go @@ -0,0 +1,171 @@ +package nodb + +import ( + "fmt" + "sync" + + "github.com/lunny/nodb/store" +) + +type ibucket interface { + Get(key []byte) ([]byte, error) + + Put(key []byte, value []byte) error + Delete(key []byte) error + + NewIterator() *store.Iterator + + NewWriteBatch() store.WriteBatch + + RangeIterator(min []byte, max []byte, rangeType uint8) *store.RangeLimitIterator + RevRangeIterator(min []byte, max []byte, rangeType uint8) *store.RangeLimitIterator + RangeLimitIterator(min []byte, max []byte, rangeType uint8, offset int, count int) *store.RangeLimitIterator + RevRangeLimitIterator(min []byte, max []byte, rangeType uint8, offset int, count int) *store.RangeLimitIterator +} + +type DB struct { + l *Nodb + + sdb *store.DB + + bucket ibucket + + index uint8 + + kvBatch *batch + listBatch *batch + hashBatch *batch + zsetBatch *batch + binBatch *batch + setBatch *batch + + status uint8 +} + +func (l *Nodb) newDB(index uint8) *DB { + d := new(DB) + + d.l = l + + d.sdb = l.ldb + + d.bucket = d.sdb + + d.status = DBAutoCommit + d.index = index + + d.kvBatch = d.newBatch() + d.listBatch = d.newBatch() + d.hashBatch = d.newBatch() + d.zsetBatch = d.newBatch() + d.binBatch = d.newBatch() + d.setBatch = d.newBatch() + + return d +} + +func (db *DB) newBatch() *batch { + return db.l.newBatch(db.bucket.NewWriteBatch(), &dbBatchLocker{l: &sync.Mutex{}, wrLock: &db.l.wLock}, nil) +} + +func (db *DB) Index() int { + return int(db.index) +} + +func (db *DB) IsAutoCommit() bool { + return db.status == DBAutoCommit +} + +func (db *DB) FlushAll() (drop int64, err error) { + all := [...](func() (int64, error)){ + db.flush, + db.lFlush, + db.hFlush, + db.zFlush, + db.bFlush, + db.sFlush} + + for _, flush := range all { + if n, e := flush(); e != nil { + err = e + return + } else { + drop += n + } + } + + return +} + +func (db *DB) newEliminator() *elimination { + eliminator := newEliminator(db) + + eliminator.regRetireContext(KVType, db.kvBatch, db.delete) + eliminator.regRetireContext(ListType, db.listBatch, db.lDelete) + eliminator.regRetireContext(HashType, db.hashBatch, db.hDelete) + eliminator.regRetireContext(ZSetType, db.zsetBatch, db.zDelete) + eliminator.regRetireContext(BitType, db.binBatch, db.bDelete) + eliminator.regRetireContext(SetType, db.setBatch, db.sDelete) + + return eliminator +} + +func (db *DB) flushRegion(t *batch, minKey []byte, maxKey []byte) (drop int64, err error) { + it := db.bucket.RangeIterator(minKey, maxKey, store.RangeROpen) + for ; it.Valid(); it.Next() { + t.Delete(it.RawKey()) + drop++ + if drop&1023 == 0 { + if err = t.Commit(); err != nil { + return + } + } + } + it.Close() + return +} + +func (db *DB) flushType(t *batch, dataType byte) (drop int64, err error) { + var deleteFunc func(t *batch, key []byte) int64 + var metaDataType byte + switch dataType { + case KVType: + deleteFunc = db.delete + metaDataType = KVType + case ListType: + deleteFunc = db.lDelete + metaDataType = LMetaType + case HashType: + deleteFunc = db.hDelete + metaDataType = HSizeType + case ZSetType: + deleteFunc = db.zDelete + metaDataType = ZSizeType + case BitType: + deleteFunc = db.bDelete + metaDataType = BitMetaType + case SetType: + deleteFunc = db.sDelete + metaDataType = SSizeType + default: + return 0, fmt.Errorf("invalid data type: %s", TypeName[dataType]) + } + + var keys [][]byte + keys, err = db.scan(metaDataType, nil, 1024, false, "") + for len(keys) != 0 || err != nil { + for _, key := range keys { + deleteFunc(t, key) + db.rmExpire(t, dataType, key) + + } + + if err = t.Commit(); err != nil { + return + } else { + drop += int64(len(keys)) + } + keys, err = db.scan(metaDataType, nil, 1024, false, "") + } + return +} diff --git a/vendor/github.com/lunny/nodb/replication.go b/vendor/github.com/lunny/nodb/replication.go new file mode 100644 index 0000000000..f9bc951085 --- /dev/null +++ b/vendor/github.com/lunny/nodb/replication.go @@ -0,0 +1,312 @@ +package nodb + +import ( + "bufio" + "bytes" + "errors" + "io" + "os" + "time" + + "github.com/lunny/log" + "github.com/lunny/nodb/store/driver" +) + +const ( + maxReplBatchNum = 100 + maxReplLogSize = 1 * 1024 * 1024 +) + +var ( + ErrSkipEvent = errors.New("skip to next event") +) + +var ( + errInvalidBinLogEvent = errors.New("invalid binglog event") + errInvalidBinLogFile = errors.New("invalid binlog file") +) + +type replBatch struct { + wb driver.IWriteBatch + events [][]byte + l *Nodb + + lastHead *BinLogHead +} + +func (b *replBatch) Commit() error { + b.l.commitLock.Lock() + defer b.l.commitLock.Unlock() + + err := b.wb.Commit() + if err != nil { + b.Rollback() + return err + } + + if b.l.binlog != nil { + if err = b.l.binlog.Log(b.events...); err != nil { + b.Rollback() + return err + } + } + + b.events = [][]byte{} + b.lastHead = nil + + return nil +} + +func (b *replBatch) Rollback() error { + b.wb.Rollback() + b.events = [][]byte{} + b.lastHead = nil + return nil +} + +func (l *Nodb) replicateEvent(b *replBatch, event []byte) error { + if len(event) == 0 { + return errInvalidBinLogEvent + } + + b.events = append(b.events, event) + + logType := uint8(event[0]) + switch logType { + case BinLogTypePut: + return l.replicatePutEvent(b, event) + case BinLogTypeDeletion: + return l.replicateDeleteEvent(b, event) + default: + return errInvalidBinLogEvent + } +} + +func (l *Nodb) replicatePutEvent(b *replBatch, event []byte) error { + key, value, err := decodeBinLogPut(event) + if err != nil { + return err + } + + b.wb.Put(key, value) + + return nil +} + +func (l *Nodb) replicateDeleteEvent(b *replBatch, event []byte) error { + key, err := decodeBinLogDelete(event) + if err != nil { + return err + } + + b.wb.Delete(key) + + return nil +} + +func ReadEventFromReader(rb io.Reader, f func(head *BinLogHead, event []byte) error) error { + head := &BinLogHead{} + var err error + + for { + if err = head.Read(rb); err != nil { + if err == io.EOF { + break + } else { + return err + } + } + + var dataBuf bytes.Buffer + + if _, err = io.CopyN(&dataBuf, rb, int64(head.PayloadLen)); err != nil { + return err + } + + err = f(head, dataBuf.Bytes()) + if err != nil && err != ErrSkipEvent { + return err + } + } + + return nil +} + +func (l *Nodb) ReplicateFromReader(rb io.Reader) error { + b := new(replBatch) + + b.wb = l.ldb.NewWriteBatch() + b.l = l + + f := func(head *BinLogHead, event []byte) error { + if b.lastHead == nil { + b.lastHead = head + } else if !b.lastHead.InSameBatch(head) { + if err := b.Commit(); err != nil { + log.Fatal("replication error %s, skip to next", err.Error()) + return ErrSkipEvent + } + b.lastHead = head + } + + err := l.replicateEvent(b, event) + if err != nil { + log.Fatal("replication error %s, skip to next", err.Error()) + return ErrSkipEvent + } + return nil + } + + err := ReadEventFromReader(rb, f) + if err != nil { + b.Rollback() + return err + } + return b.Commit() +} + +func (l *Nodb) ReplicateFromData(data []byte) error { + rb := bytes.NewReader(data) + + err := l.ReplicateFromReader(rb) + + return err +} + +func (l *Nodb) ReplicateFromBinLog(filePath string) error { + f, err := os.Open(filePath) + if err != nil { + return err + } + + rb := bufio.NewReaderSize(f, 4096) + + err = l.ReplicateFromReader(rb) + + f.Close() + + return err +} + +// try to read events, if no events read, try to wait the new event singal until timeout seconds +func (l *Nodb) ReadEventsToTimeout(info *BinLogAnchor, w io.Writer, timeout int) (n int, err error) { + lastIndex := info.LogFileIndex + lastPos := info.LogPos + + n = 0 + if l.binlog == nil { + //binlog not supported + info.LogFileIndex = 0 + info.LogPos = 0 + return + } + + n, err = l.ReadEventsTo(info, w) + if err == nil && info.LogFileIndex == lastIndex && info.LogPos == lastPos { + //no events read + select { + case <-l.binlog.Wait(): + case <-time.After(time.Duration(timeout) * time.Second): + } + return l.ReadEventsTo(info, w) + } + return +} + +func (l *Nodb) ReadEventsTo(info *BinLogAnchor, w io.Writer) (n int, err error) { + n = 0 + if l.binlog == nil { + //binlog not supported + info.LogFileIndex = 0 + info.LogPos = 0 + return + } + + index := info.LogFileIndex + offset := info.LogPos + + filePath := l.binlog.FormatLogFilePath(index) + + var f *os.File + f, err = os.Open(filePath) + if os.IsNotExist(err) { + lastIndex := l.binlog.LogFileIndex() + + if index == lastIndex { + //no binlog at all + info.LogPos = 0 + } else { + //slave binlog info had lost + info.LogFileIndex = -1 + } + } + + if err != nil { + if os.IsNotExist(err) { + err = nil + } + return + } + + defer f.Close() + + var fileSize int64 + st, _ := f.Stat() + fileSize = st.Size() + + if fileSize == info.LogPos { + return + } + + if _, err = f.Seek(offset, os.SEEK_SET); err != nil { + //may be invliad seek offset + return + } + + var lastHead *BinLogHead = nil + + head := &BinLogHead{} + + batchNum := 0 + + for { + if err = head.Read(f); err != nil { + if err == io.EOF { + //we will try to use next binlog + if index < l.binlog.LogFileIndex() { + info.LogFileIndex += 1 + info.LogPos = 0 + } + err = nil + return + } else { + return + } + + } + + if lastHead == nil { + lastHead = head + batchNum++ + } else if !lastHead.InSameBatch(head) { + lastHead = head + batchNum++ + if batchNum > maxReplBatchNum || n > maxReplLogSize { + return + } + } + + if err = head.Write(w); err != nil { + return + } + + if _, err = io.CopyN(w, f, int64(head.PayloadLen)); err != nil { + return + } + + n += (head.Len() + int(head.PayloadLen)) + info.LogPos = info.LogPos + int64(head.Len()) + int64(head.PayloadLen) + } + + return +} diff --git a/vendor/github.com/lunny/nodb/scan.go b/vendor/github.com/lunny/nodb/scan.go new file mode 100644 index 0000000000..e989db3fed --- /dev/null +++ b/vendor/github.com/lunny/nodb/scan.go @@ -0,0 +1,144 @@ +package nodb + +import ( + "bytes" + "errors" + "regexp" + + "github.com/lunny/nodb/store" +) + +var errDataType = errors.New("error data type") +var errMetaKey = errors.New("error meta key") + +// Seek search the prefix key +func (db *DB) Seek(key []byte) (*store.Iterator, error) { + return db.seek(KVType, key) +} + +func (db *DB) seek(dataType byte, key []byte) (*store.Iterator, error) { + var minKey []byte + var err error + + if len(key) > 0 { + if err = checkKeySize(key); err != nil { + return nil, err + } + if minKey, err = db.encodeMetaKey(dataType, key); err != nil { + return nil, err + } + + } else { + if minKey, err = db.encodeMinKey(dataType); err != nil { + return nil, err + } + } + + it := db.bucket.NewIterator() + it.Seek(minKey) + return it, nil +} + +func (db *DB) MaxKey() ([]byte, error) { + return db.encodeMaxKey(KVType) +} + +func (db *DB) Key(it *store.Iterator) ([]byte, error) { + return db.decodeMetaKey(KVType, it.Key()) +} + +func (db *DB) scan(dataType byte, key []byte, count int, inclusive bool, match string) ([][]byte, error) { + var minKey, maxKey []byte + var err error + var r *regexp.Regexp + + if len(match) > 0 { + if r, err = regexp.Compile(match); err != nil { + return nil, err + } + } + + if len(key) > 0 { + if err = checkKeySize(key); err != nil { + return nil, err + } + if minKey, err = db.encodeMetaKey(dataType, key); err != nil { + return nil, err + } + + } else { + if minKey, err = db.encodeMinKey(dataType); err != nil { + return nil, err + } + } + + if maxKey, err = db.encodeMaxKey(dataType); err != nil { + return nil, err + } + + if count <= 0 { + count = defaultScanCount + } + + v := make([][]byte, 0, count) + + it := db.bucket.NewIterator() + it.Seek(minKey) + + if !inclusive { + if it.Valid() && bytes.Equal(it.RawKey(), minKey) { + it.Next() + } + } + + for i := 0; it.Valid() && i < count && bytes.Compare(it.RawKey(), maxKey) < 0; it.Next() { + if k, err := db.decodeMetaKey(dataType, it.Key()); err != nil { + continue + } else if r != nil && !r.Match(k) { + continue + } else { + v = append(v, k) + i++ + } + } + it.Close() + return v, nil +} + +func (db *DB) encodeMinKey(dataType byte) ([]byte, error) { + return db.encodeMetaKey(dataType, nil) +} + +func (db *DB) encodeMaxKey(dataType byte) ([]byte, error) { + k, err := db.encodeMetaKey(dataType, nil) + if err != nil { + return nil, err + } + k[len(k)-1] = dataType + 1 + return k, nil +} + +func (db *DB) encodeMetaKey(dataType byte, key []byte) ([]byte, error) { + switch dataType { + case KVType: + return db.encodeKVKey(key), nil + case LMetaType: + return db.lEncodeMetaKey(key), nil + case HSizeType: + return db.hEncodeSizeKey(key), nil + case ZSizeType: + return db.zEncodeSizeKey(key), nil + case BitMetaType: + return db.bEncodeMetaKey(key), nil + case SSizeType: + return db.sEncodeSizeKey(key), nil + default: + return nil, errDataType + } +} +func (db *DB) decodeMetaKey(dataType byte, ek []byte) ([]byte, error) { + if len(ek) < 2 || ek[0] != db.index || ek[1] != dataType { + return nil, errMetaKey + } + return ek[2:], nil +} diff --git a/vendor/github.com/lunny/nodb/store/db.go b/vendor/github.com/lunny/nodb/store/db.go new file mode 100644 index 0000000000..00a8831a67 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/db.go @@ -0,0 +1,61 @@ +package store + +import ( + "github.com/lunny/nodb/store/driver" +) + +type DB struct { + driver.IDB +} + +func (db *DB) NewIterator() *Iterator { + it := new(Iterator) + it.it = db.IDB.NewIterator() + + return it +} + +func (db *DB) NewWriteBatch() WriteBatch { + return db.IDB.NewWriteBatch() +} + +func (db *DB) NewSnapshot() (*Snapshot, error) { + var err error + s := &Snapshot{} + if s.ISnapshot, err = db.IDB.NewSnapshot(); err != nil { + return nil, err + } + + return s, nil +} + +func (db *DB) RangeIterator(min []byte, max []byte, rangeType uint8) *RangeLimitIterator { + return NewRangeLimitIterator(db.NewIterator(), &Range{min, max, rangeType}, &Limit{0, -1}) +} + +func (db *DB) RevRangeIterator(min []byte, max []byte, rangeType uint8) *RangeLimitIterator { + return NewRevRangeLimitIterator(db.NewIterator(), &Range{min, max, rangeType}, &Limit{0, -1}) +} + +//count < 0, unlimit. +// +//offset must >= 0, if < 0, will get nothing. +func (db *DB) RangeLimitIterator(min []byte, max []byte, rangeType uint8, offset int, count int) *RangeLimitIterator { + return NewRangeLimitIterator(db.NewIterator(), &Range{min, max, rangeType}, &Limit{offset, count}) +} + +//count < 0, unlimit. +// +//offset must >= 0, if < 0, will get nothing. +func (db *DB) RevRangeLimitIterator(min []byte, max []byte, rangeType uint8, offset int, count int) *RangeLimitIterator { + return NewRevRangeLimitIterator(db.NewIterator(), &Range{min, max, rangeType}, &Limit{offset, count}) +} + +func (db *DB) Begin() (*Tx, error) { + tx, err := db.IDB.Begin() + if err != nil { + return nil, err + } + + return &Tx{tx}, nil +} diff --git a/vendor/github.com/lunny/nodb/store/driver/batch.go b/vendor/github.com/lunny/nodb/store/driver/batch.go new file mode 100644 index 0000000000..6b79c21c48 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/driver/batch.go @@ -0,0 +1,39 @@ +package driver + +type BatchPuter interface { + BatchPut([]Write) error +} + +type Write struct { + Key []byte + Value []byte +} + +type WriteBatch struct { + batch BatchPuter + wb []Write +} + +func (w *WriteBatch) Put(key, value []byte) { + if value == nil { + value = []byte{} + } + w.wb = append(w.wb, Write{key, value}) +} + +func (w *WriteBatch) Delete(key []byte) { + w.wb = append(w.wb, Write{key, nil}) +} + +func (w *WriteBatch) Commit() error { + return w.batch.BatchPut(w.wb) +} + +func (w *WriteBatch) Rollback() error { + w.wb = w.wb[0:0] + return nil +} + +func NewWriteBatch(puter BatchPuter) IWriteBatch { + return &WriteBatch{puter, []Write{}} +} diff --git a/vendor/github.com/lunny/nodb/store/driver/driver.go b/vendor/github.com/lunny/nodb/store/driver/driver.go new file mode 100644 index 0000000000..6da67df083 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/driver/driver.go @@ -0,0 +1,67 @@ +package driver + +import ( + "errors" +) + +var ( + ErrTxSupport = errors.New("transaction is not supported") +) + +type IDB interface { + Close() error + + Get(key []byte) ([]byte, error) + + Put(key []byte, value []byte) error + Delete(key []byte) error + + NewIterator() IIterator + + NewWriteBatch() IWriteBatch + + NewSnapshot() (ISnapshot, error) + + Begin() (Tx, error) +} + +type ISnapshot interface { + Get(key []byte) ([]byte, error) + NewIterator() IIterator + Close() +} + +type IIterator interface { + Close() error + + First() + Last() + Seek(key []byte) + + Next() + Prev() + + Valid() bool + + Key() []byte + Value() []byte +} + +type IWriteBatch interface { + Put(key []byte, value []byte) + Delete(key []byte) + Commit() error + Rollback() error +} + +type Tx interface { + Get(key []byte) ([]byte, error) + Put(key []byte, value []byte) error + Delete(key []byte) error + + NewIterator() IIterator + NewWriteBatch() IWriteBatch + + Commit() error + Rollback() error +} diff --git a/vendor/github.com/lunny/nodb/store/driver/store.go b/vendor/github.com/lunny/nodb/store/driver/store.go new file mode 100644 index 0000000000..173431d4c1 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/driver/store.go @@ -0,0 +1,46 @@ +package driver + +import ( + "fmt" + + "github.com/lunny/nodb/config" +) + +type Store interface { + String() string + Open(path string, cfg *config.Config) (IDB, error) + Repair(path string, cfg *config.Config) error +} + +var dbs = map[string]Store{} + +func Register(s Store) { + name := s.String() + if _, ok := dbs[name]; ok { + panic(fmt.Errorf("store %s is registered", s)) + } + + dbs[name] = s +} + +func ListStores() []string { + s := []string{} + for k, _ := range dbs { + s = append(s, k) + } + + return s +} + +func GetStore(cfg *config.Config) (Store, error) { + if len(cfg.DBName) == 0 { + cfg.DBName = config.DefaultDBName + } + + s, ok := dbs[cfg.DBName] + if !ok { + return nil, fmt.Errorf("store %s is not registered", cfg.DBName) + } + + return s, nil +} diff --git a/vendor/github.com/lunny/nodb/store/goleveldb/batch.go b/vendor/github.com/lunny/nodb/store/goleveldb/batch.go new file mode 100644 index 0000000000..b17e85e750 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/goleveldb/batch.go @@ -0,0 +1,27 @@ +package goleveldb + +import ( + "github.com/syndtr/goleveldb/leveldb" +) + +type WriteBatch struct { + db *DB + wbatch *leveldb.Batch +} + +func (w *WriteBatch) Put(key, value []byte) { + w.wbatch.Put(key, value) +} + +func (w *WriteBatch) Delete(key []byte) { + w.wbatch.Delete(key) +} + +func (w *WriteBatch) Commit() error { + return w.db.db.Write(w.wbatch, nil) +} + +func (w *WriteBatch) Rollback() error { + w.wbatch.Reset() + return nil +} diff --git a/vendor/github.com/lunny/nodb/store/goleveldb/const.go b/vendor/github.com/lunny/nodb/store/goleveldb/const.go new file mode 100644 index 0000000000..2fffa7c82b --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/goleveldb/const.go @@ -0,0 +1,4 @@ +package goleveldb + +const DBName = "goleveldb" +const MemDBName = "memory" diff --git a/vendor/github.com/lunny/nodb/store/goleveldb/db.go b/vendor/github.com/lunny/nodb/store/goleveldb/db.go new file mode 100644 index 0000000000..a36e87f628 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/goleveldb/db.go @@ -0,0 +1,187 @@ +package goleveldb + +import ( + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + + "github.com/lunny/nodb/config" + "github.com/lunny/nodb/store/driver" + + "os" +) + +const defaultFilterBits int = 10 + +type Store struct { +} + +func (s Store) String() string { + return DBName +} + +type MemStore struct { +} + +func (s MemStore) String() string { + return MemDBName +} + +type DB struct { + path string + + cfg *config.LevelDBConfig + + db *leveldb.DB + + opts *opt.Options + + iteratorOpts *opt.ReadOptions + + cache cache.Cache + + filter filter.Filter +} + +func (s Store) Open(path string, cfg *config.Config) (driver.IDB, error) { + if err := os.MkdirAll(path, os.ModePerm); err != nil { + return nil, err + } + + db := new(DB) + db.path = path + db.cfg = &cfg.LevelDB + + db.initOpts() + + var err error + db.db, err = leveldb.OpenFile(db.path, db.opts) + + if err != nil { + return nil, err + } + + return db, nil +} + +func (s Store) Repair(path string, cfg *config.Config) error { + db, err := leveldb.RecoverFile(path, newOptions(&cfg.LevelDB)) + if err != nil { + return err + } + + db.Close() + return nil +} + +func (s MemStore) Open(path string, cfg *config.Config) (driver.IDB, error) { + db := new(DB) + db.path = path + db.cfg = &cfg.LevelDB + + db.initOpts() + + var err error + db.db, err = leveldb.Open(storage.NewMemStorage(), db.opts) + if err != nil { + return nil, err + } + + return db, nil +} + +func (s MemStore) Repair(path string, cfg *config.Config) error { + return nil +} + +func (db *DB) initOpts() { + db.opts = newOptions(db.cfg) + + db.iteratorOpts = &opt.ReadOptions{} + db.iteratorOpts.DontFillCache = true +} + +func newOptions(cfg *config.LevelDBConfig) *opt.Options { + opts := &opt.Options{} + opts.ErrorIfMissing = false + + cfg.Adjust() + + //opts.BlockCacher = cache.NewLRU(cfg.CacheSize) + opts.BlockCacheCapacity = cfg.CacheSize + + //we must use bloomfilter + opts.Filter = filter.NewBloomFilter(defaultFilterBits) + + if !cfg.Compression { + opts.Compression = opt.NoCompression + } else { + opts.Compression = opt.SnappyCompression + } + + opts.BlockSize = cfg.BlockSize + opts.WriteBuffer = cfg.WriteBufferSize + + return opts +} + +func (db *DB) Close() error { + return db.db.Close() +} + +func (db *DB) Put(key, value []byte) error { + return db.db.Put(key, value, nil) +} + +func (db *DB) Get(key []byte) ([]byte, error) { + v, err := db.db.Get(key, nil) + if err == leveldb.ErrNotFound { + return nil, nil + } + return v, nil +} + +func (db *DB) Delete(key []byte) error { + return db.db.Delete(key, nil) +} + +func (db *DB) NewWriteBatch() driver.IWriteBatch { + wb := &WriteBatch{ + db: db, + wbatch: new(leveldb.Batch), + } + return wb +} + +func (db *DB) NewIterator() driver.IIterator { + it := &Iterator{ + db.db.NewIterator(nil, db.iteratorOpts), + } + + return it +} + +func (db *DB) Begin() (driver.Tx, error) { + return nil, driver.ErrTxSupport +} + +func (db *DB) NewSnapshot() (driver.ISnapshot, error) { + snapshot, err := db.db.GetSnapshot() + if err != nil { + return nil, err + } + + s := &Snapshot{ + db: db, + snp: snapshot, + } + + return s, nil +} + +func init() { + driver.Register(Store{}) + driver.Register(MemStore{}) +} diff --git a/vendor/github.com/lunny/nodb/store/goleveldb/iterator.go b/vendor/github.com/lunny/nodb/store/goleveldb/iterator.go new file mode 100644 index 0000000000..c1fd8b5573 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/goleveldb/iterator.go @@ -0,0 +1,49 @@ +package goleveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/iterator" +) + +type Iterator struct { + it iterator.Iterator +} + +func (it *Iterator) Key() []byte { + return it.it.Key() +} + +func (it *Iterator) Value() []byte { + return it.it.Value() +} + +func (it *Iterator) Close() error { + if it.it != nil { + it.it.Release() + it.it = nil + } + return nil +} + +func (it *Iterator) Valid() bool { + return it.it.Valid() +} + +func (it *Iterator) Next() { + it.it.Next() +} + +func (it *Iterator) Prev() { + it.it.Prev() +} + +func (it *Iterator) First() { + it.it.First() +} + +func (it *Iterator) Last() { + it.it.Last() +} + +func (it *Iterator) Seek(key []byte) { + it.it.Seek(key) +} diff --git a/vendor/github.com/lunny/nodb/store/goleveldb/snapshot.go b/vendor/github.com/lunny/nodb/store/goleveldb/snapshot.go new file mode 100644 index 0000000000..fe2b409c3f --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/goleveldb/snapshot.go @@ -0,0 +1,26 @@ +package goleveldb + +import ( + "github.com/lunny/nodb/store/driver" + "github.com/syndtr/goleveldb/leveldb" +) + +type Snapshot struct { + db *DB + snp *leveldb.Snapshot +} + +func (s *Snapshot) Get(key []byte) ([]byte, error) { + return s.snp.Get(key, s.db.iteratorOpts) +} + +func (s *Snapshot) NewIterator() driver.IIterator { + it := &Iterator{ + s.snp.NewIterator(nil, s.db.iteratorOpts), + } + return it +} + +func (s *Snapshot) Close() { + s.snp.Release() +} diff --git a/vendor/github.com/lunny/nodb/store/iterator.go b/vendor/github.com/lunny/nodb/store/iterator.go new file mode 100644 index 0000000000..27bf689da2 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/iterator.go @@ -0,0 +1,327 @@ +package store + +import ( + "bytes" + + "github.com/lunny/nodb/store/driver" +) + +const ( + IteratorForward uint8 = 0 + IteratorBackward uint8 = 1 +) + +const ( + RangeClose uint8 = 0x00 + RangeLOpen uint8 = 0x01 + RangeROpen uint8 = 0x10 + RangeOpen uint8 = 0x11 +) + +// min must less or equal than max +// +// range type: +// +// close: [min, max] +// open: (min, max) +// lopen: (min, max] +// ropen: [min, max) +// +type Range struct { + Min []byte + Max []byte + + Type uint8 +} + +type Limit struct { + Offset int + Count int +} + +type Iterator struct { + it driver.IIterator +} + +// Returns a copy of key. +func (it *Iterator) Key() []byte { + k := it.it.Key() + if k == nil { + return nil + } + + return append([]byte{}, k...) +} + +// Returns a copy of value. +func (it *Iterator) Value() []byte { + v := it.it.Value() + if v == nil { + return nil + } + + return append([]byte{}, v...) +} + +// Returns a reference of key. +// you must be careful that it will be changed after next iterate. +func (it *Iterator) RawKey() []byte { + return it.it.Key() +} + +// Returns a reference of value. +// you must be careful that it will be changed after next iterate. +func (it *Iterator) RawValue() []byte { + return it.it.Value() +} + +// Copy key to b, if b len is small or nil, returns a new one. +func (it *Iterator) BufKey(b []byte) []byte { + k := it.RawKey() + if k == nil { + return nil + } + if b == nil { + b = []byte{} + } + + b = b[0:0] + return append(b, k...) +} + +// Copy value to b, if b len is small or nil, returns a new one. +func (it *Iterator) BufValue(b []byte) []byte { + v := it.RawValue() + if v == nil { + return nil + } + + if b == nil { + b = []byte{} + } + + b = b[0:0] + return append(b, v...) +} + +func (it *Iterator) Close() { + if it.it != nil { + it.it.Close() + it.it = nil + } +} + +func (it *Iterator) Valid() bool { + return it.it.Valid() +} + +func (it *Iterator) Next() { + it.it.Next() +} + +func (it *Iterator) Prev() { + it.it.Prev() +} + +func (it *Iterator) SeekToFirst() { + it.it.First() +} + +func (it *Iterator) SeekToLast() { + it.it.Last() +} + +func (it *Iterator) Seek(key []byte) { + it.it.Seek(key) +} + +// Finds by key, if not found, nil returns. +func (it *Iterator) Find(key []byte) []byte { + it.Seek(key) + if it.Valid() { + k := it.RawKey() + if k == nil { + return nil + } else if bytes.Equal(k, key) { + return it.Value() + } + } + + return nil +} + +// Finds by key, if not found, nil returns, else a reference of value returns. +// you must be careful that it will be changed after next iterate. +func (it *Iterator) RawFind(key []byte) []byte { + it.Seek(key) + if it.Valid() { + k := it.RawKey() + if k == nil { + return nil + } else if bytes.Equal(k, key) { + return it.RawValue() + } + } + + return nil +} + +type RangeLimitIterator struct { + it *Iterator + + r *Range + l *Limit + + step int + + //0 for IteratorForward, 1 for IteratorBackward + direction uint8 +} + +func (it *RangeLimitIterator) Key() []byte { + return it.it.Key() +} + +func (it *RangeLimitIterator) Value() []byte { + return it.it.Value() +} + +func (it *RangeLimitIterator) RawKey() []byte { + return it.it.RawKey() +} + +func (it *RangeLimitIterator) RawValue() []byte { + return it.it.RawValue() +} + +func (it *RangeLimitIterator) BufKey(b []byte) []byte { + return it.it.BufKey(b) +} + +func (it *RangeLimitIterator) BufValue(b []byte) []byte { + return it.it.BufValue(b) +} + +func (it *RangeLimitIterator) Valid() bool { + if it.l.Offset < 0 { + return false + } else if !it.it.Valid() { + return false + } else if it.l.Count >= 0 && it.step >= it.l.Count { + return false + } + + if it.direction == IteratorForward { + if it.r.Max != nil { + r := bytes.Compare(it.it.RawKey(), it.r.Max) + if it.r.Type&RangeROpen > 0 { + return !(r >= 0) + } else { + return !(r > 0) + } + } + } else { + if it.r.Min != nil { + r := bytes.Compare(it.it.RawKey(), it.r.Min) + if it.r.Type&RangeLOpen > 0 { + return !(r <= 0) + } else { + return !(r < 0) + } + } + } + + return true +} + +func (it *RangeLimitIterator) Next() { + it.step++ + + if it.direction == IteratorForward { + it.it.Next() + } else { + it.it.Prev() + } +} + +func (it *RangeLimitIterator) Close() { + it.it.Close() +} + +func NewRangeLimitIterator(i *Iterator, r *Range, l *Limit) *RangeLimitIterator { + return rangeLimitIterator(i, r, l, IteratorForward) +} + +func NewRevRangeLimitIterator(i *Iterator, r *Range, l *Limit) *RangeLimitIterator { + return rangeLimitIterator(i, r, l, IteratorBackward) +} + +func NewRangeIterator(i *Iterator, r *Range) *RangeLimitIterator { + return rangeLimitIterator(i, r, &Limit{0, -1}, IteratorForward) +} + +func NewRevRangeIterator(i *Iterator, r *Range) *RangeLimitIterator { + return rangeLimitIterator(i, r, &Limit{0, -1}, IteratorBackward) +} + +func rangeLimitIterator(i *Iterator, r *Range, l *Limit, direction uint8) *RangeLimitIterator { + it := new(RangeLimitIterator) + + it.it = i + + it.r = r + it.l = l + it.direction = direction + + it.step = 0 + + if l.Offset < 0 { + return it + } + + if direction == IteratorForward { + if r.Min == nil { + it.it.SeekToFirst() + } else { + it.it.Seek(r.Min) + + if r.Type&RangeLOpen > 0 { + if it.it.Valid() && bytes.Equal(it.it.RawKey(), r.Min) { + it.it.Next() + } + } + } + } else { + if r.Max == nil { + it.it.SeekToLast() + } else { + it.it.Seek(r.Max) + + if !it.it.Valid() { + it.it.SeekToLast() + } else { + if !bytes.Equal(it.it.RawKey(), r.Max) { + it.it.Prev() + } + } + + if r.Type&RangeROpen > 0 { + if it.it.Valid() && bytes.Equal(it.it.RawKey(), r.Max) { + it.it.Prev() + } + } + } + } + + for i := 0; i < l.Offset; i++ { + if it.it.Valid() { + if it.direction == IteratorForward { + it.it.Next() + } else { + it.it.Prev() + } + } + } + + return it +} diff --git a/vendor/github.com/lunny/nodb/store/snapshot.go b/vendor/github.com/lunny/nodb/store/snapshot.go new file mode 100644 index 0000000000..75ba0497db --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/snapshot.go @@ -0,0 +1,16 @@ +package store + +import ( + "github.com/lunny/nodb/store/driver" +) + +type Snapshot struct { + driver.ISnapshot +} + +func (s *Snapshot) NewIterator() *Iterator { + it := new(Iterator) + it.it = s.ISnapshot.NewIterator() + + return it +} diff --git a/vendor/github.com/lunny/nodb/store/store.go b/vendor/github.com/lunny/nodb/store/store.go new file mode 100644 index 0000000000..5d0ade1bf0 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/store.go @@ -0,0 +1,51 @@ +package store + +import ( + "fmt" + "os" + "path" + "github.com/lunny/nodb/config" + "github.com/lunny/nodb/store/driver" + + _ "github.com/lunny/nodb/store/goleveldb" +) + +func getStorePath(cfg *config.Config) string { + return path.Join(cfg.DataDir, fmt.Sprintf("%s_data", cfg.DBName)) +} + +func Open(cfg *config.Config) (*DB, error) { + s, err := driver.GetStore(cfg) + if err != nil { + return nil, err + } + + path := getStorePath(cfg) + + if err := os.MkdirAll(path, os.ModePerm); err != nil { + return nil, err + } + + idb, err := s.Open(path, cfg) + if err != nil { + return nil, err + } + + db := &DB{idb} + + return db, nil +} + +func Repair(cfg *config.Config) error { + s, err := driver.GetStore(cfg) + if err != nil { + return err + } + + path := getStorePath(cfg) + + return s.Repair(path, cfg) +} + +func init() { +} diff --git a/vendor/github.com/lunny/nodb/store/tx.go b/vendor/github.com/lunny/nodb/store/tx.go new file mode 100644 index 0000000000..32bcbcda4b --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/tx.go @@ -0,0 +1,42 @@ +package store + +import ( + "github.com/lunny/nodb/store/driver" +) + +type Tx struct { + driver.Tx +} + +func (tx *Tx) NewIterator() *Iterator { + it := new(Iterator) + it.it = tx.Tx.NewIterator() + + return it +} + +func (tx *Tx) NewWriteBatch() WriteBatch { + return tx.Tx.NewWriteBatch() +} + +func (tx *Tx) RangeIterator(min []byte, max []byte, rangeType uint8) *RangeLimitIterator { + return NewRangeLimitIterator(tx.NewIterator(), &Range{min, max, rangeType}, &Limit{0, -1}) +} + +func (tx *Tx) RevRangeIterator(min []byte, max []byte, rangeType uint8) *RangeLimitIterator { + return NewRevRangeLimitIterator(tx.NewIterator(), &Range{min, max, rangeType}, &Limit{0, -1}) +} + +//count < 0, unlimit. +// +//offset must >= 0, if < 0, will get nothing. +func (tx *Tx) RangeLimitIterator(min []byte, max []byte, rangeType uint8, offset int, count int) *RangeLimitIterator { + return NewRangeLimitIterator(tx.NewIterator(), &Range{min, max, rangeType}, &Limit{offset, count}) +} + +//count < 0, unlimit. +// +//offset must >= 0, if < 0, will get nothing. +func (tx *Tx) RevRangeLimitIterator(min []byte, max []byte, rangeType uint8, offset int, count int) *RangeLimitIterator { + return NewRevRangeLimitIterator(tx.NewIterator(), &Range{min, max, rangeType}, &Limit{offset, count}) +} diff --git a/vendor/github.com/lunny/nodb/store/writebatch.go b/vendor/github.com/lunny/nodb/store/writebatch.go new file mode 100644 index 0000000000..23e079eba6 --- /dev/null +++ b/vendor/github.com/lunny/nodb/store/writebatch.go @@ -0,0 +1,9 @@ +package store + +import ( + "github.com/lunny/nodb/store/driver" +) + +type WriteBatch interface { + driver.IWriteBatch +} diff --git a/vendor/github.com/lunny/nodb/t_bit.go b/vendor/github.com/lunny/nodb/t_bit.go new file mode 100644 index 0000000000..930d4ba568 --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_bit.go @@ -0,0 +1,922 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "sort" + "time" + + "github.com/lunny/nodb/store" +) + +const ( + OPand uint8 = iota + 1 + OPor + OPxor + OPnot +) + +type BitPair struct { + Pos int32 + Val uint8 +} + +type segBitInfo struct { + Seq uint32 + Off uint32 + Val uint8 +} + +type segBitInfoArray []segBitInfo + +const ( + // byte + segByteWidth uint32 = 9 + segByteSize uint32 = 1 << segByteWidth + + // bit + segBitWidth uint32 = segByteWidth + 3 + segBitSize uint32 = segByteSize << 3 + + maxByteSize uint32 = 8 << 20 + maxSegCount uint32 = maxByteSize / segByteSize + + minSeq uint32 = 0 + maxSeq uint32 = uint32((maxByteSize << 3) - 1) +) + +var bitsInByte = [256]int32{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, + 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, + 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, + 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, + 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, + 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, + 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, + 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, + 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, + 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8} + +var fillBits = [...]uint8{1, 3, 7, 15, 31, 63, 127, 255} + +var emptySegment []byte = make([]byte, segByteSize, segByteSize) + +var fillSegment []byte = func() []byte { + data := make([]byte, segByteSize, segByteSize) + for i := uint32(0); i < segByteSize; i++ { + data[i] = 0xff + } + return data +}() + +var errBinKey = errors.New("invalid bin key") +var errOffset = errors.New("invalid offset") +var errDuplicatePos = errors.New("duplicate bit pos") + +func getBit(sz []byte, offset uint32) uint8 { + index := offset >> 3 + if index >= uint32(len(sz)) { + return 0 // error("overflow") + } + + offset -= index << 3 + return sz[index] >> offset & 1 +} + +func setBit(sz []byte, offset uint32, val uint8) bool { + if val != 1 && val != 0 { + return false // error("invalid val") + } + + index := offset >> 3 + if index >= uint32(len(sz)) { + return false // error("overflow") + } + + offset -= index << 3 + if sz[index]>>offset&1 != val { + sz[index] ^= (1 << offset) + } + return true +} + +func (datas segBitInfoArray) Len() int { + return len(datas) +} + +func (datas segBitInfoArray) Less(i, j int) bool { + res := (datas)[i].Seq < (datas)[j].Seq + if !res && (datas)[i].Seq == (datas)[j].Seq { + res = (datas)[i].Off < (datas)[j].Off + } + return res +} + +func (datas segBitInfoArray) Swap(i, j int) { + datas[i], datas[j] = datas[j], datas[i] +} + +func (db *DB) bEncodeMetaKey(key []byte) []byte { + mk := make([]byte, len(key)+2) + mk[0] = db.index + mk[1] = BitMetaType + + copy(mk[2:], key) + return mk +} + +func (db *DB) bDecodeMetaKey(bkey []byte) ([]byte, error) { + if len(bkey) < 2 || bkey[0] != db.index || bkey[1] != BitMetaType { + return nil, errBinKey + } + + return bkey[2:], nil +} + +func (db *DB) bEncodeBinKey(key []byte, seq uint32) []byte { + bk := make([]byte, len(key)+8) + + pos := 0 + bk[pos] = db.index + pos++ + bk[pos] = BitType + pos++ + + binary.BigEndian.PutUint16(bk[pos:], uint16(len(key))) + pos += 2 + + copy(bk[pos:], key) + pos += len(key) + + binary.BigEndian.PutUint32(bk[pos:], seq) + + return bk +} + +func (db *DB) bDecodeBinKey(bkey []byte) (key []byte, seq uint32, err error) { + if len(bkey) < 8 || bkey[0] != db.index { + err = errBinKey + return + } + + keyLen := binary.BigEndian.Uint16(bkey[2:4]) + if int(keyLen+8) != len(bkey) { + err = errBinKey + return + } + + key = bkey[4 : 4+keyLen] + seq = uint32(binary.BigEndian.Uint32(bkey[4+keyLen:])) + return +} + +func (db *DB) bCapByteSize(seq uint32, off uint32) uint32 { + var offByteSize uint32 = (off >> 3) + 1 + if offByteSize > segByteSize { + offByteSize = segByteSize + } + + return seq<<segByteWidth + offByteSize +} + +func (db *DB) bParseOffset(key []byte, offset int32) (seq uint32, off uint32, err error) { + if offset < 0 { + if tailSeq, tailOff, e := db.bGetMeta(key); e != nil { + err = e + return + } else if tailSeq >= 0 { + offset += int32((uint32(tailSeq)<<segBitWidth | uint32(tailOff)) + 1) + if offset < 0 { + err = errOffset + return + } + } + } + + off = uint32(offset) + + seq = off >> segBitWidth + off &= (segBitSize - 1) + return +} + +func (db *DB) bGetMeta(key []byte) (tailSeq int32, tailOff int32, err error) { + var v []byte + + mk := db.bEncodeMetaKey(key) + v, err = db.bucket.Get(mk) + if err != nil { + return + } + + if v != nil { + tailSeq = int32(binary.LittleEndian.Uint32(v[0:4])) + tailOff = int32(binary.LittleEndian.Uint32(v[4:8])) + } else { + tailSeq = -1 + tailOff = -1 + } + return +} + +func (db *DB) bSetMeta(t *batch, key []byte, tailSeq uint32, tailOff uint32) { + ek := db.bEncodeMetaKey(key) + + buf := make([]byte, 8) + binary.LittleEndian.PutUint32(buf[0:4], tailSeq) + binary.LittleEndian.PutUint32(buf[4:8], tailOff) + + t.Put(ek, buf) + return +} + +func (db *DB) bUpdateMeta(t *batch, key []byte, seq uint32, off uint32) (tailSeq uint32, tailOff uint32, err error) { + var tseq, toff int32 + var update bool = false + + if tseq, toff, err = db.bGetMeta(key); err != nil { + return + } else if tseq < 0 { + update = true + } else { + tailSeq = uint32(MaxInt32(tseq, 0)) + tailOff = uint32(MaxInt32(toff, 0)) + update = (seq > tailSeq || (seq == tailSeq && off > tailOff)) + } + + if update { + db.bSetMeta(t, key, seq, off) + tailSeq = seq + tailOff = off + } + return +} + +func (db *DB) bDelete(t *batch, key []byte) (drop int64) { + mk := db.bEncodeMetaKey(key) + t.Delete(mk) + + minKey := db.bEncodeBinKey(key, minSeq) + maxKey := db.bEncodeBinKey(key, maxSeq) + it := db.bucket.RangeIterator(minKey, maxKey, store.RangeClose) + for ; it.Valid(); it.Next() { + t.Delete(it.RawKey()) + drop++ + } + it.Close() + + return drop +} + +func (db *DB) bGetSegment(key []byte, seq uint32) ([]byte, []byte, error) { + bk := db.bEncodeBinKey(key, seq) + segment, err := db.bucket.Get(bk) + if err != nil { + return bk, nil, err + } + return bk, segment, nil +} + +func (db *DB) bAllocateSegment(key []byte, seq uint32) ([]byte, []byte, error) { + bk, segment, err := db.bGetSegment(key, seq) + if err == nil && segment == nil { + segment = make([]byte, segByteSize, segByteSize) + } + return bk, segment, err +} + +func (db *DB) bIterator(key []byte) *store.RangeLimitIterator { + sk := db.bEncodeBinKey(key, minSeq) + ek := db.bEncodeBinKey(key, maxSeq) + return db.bucket.RangeIterator(sk, ek, store.RangeClose) +} + +func (db *DB) bSegAnd(a []byte, b []byte, res *[]byte) { + if a == nil || b == nil { + *res = nil + return + } + + data := *res + if data == nil { + data = make([]byte, segByteSize, segByteSize) + *res = data + } + + for i := uint32(0); i < segByteSize; i++ { + data[i] = a[i] & b[i] + } + return +} + +func (db *DB) bSegOr(a []byte, b []byte, res *[]byte) { + if a == nil || b == nil { + if a == nil && b == nil { + *res = nil + } else if a == nil { + *res = b + } else { + *res = a + } + return + } + + data := *res + if data == nil { + data = make([]byte, segByteSize, segByteSize) + *res = data + } + + for i := uint32(0); i < segByteSize; i++ { + data[i] = a[i] | b[i] + } + return +} + +func (db *DB) bSegXor(a []byte, b []byte, res *[]byte) { + if a == nil && b == nil { + *res = fillSegment + return + } + + if a == nil { + a = emptySegment + } + + if b == nil { + b = emptySegment + } + + data := *res + if data == nil { + data = make([]byte, segByteSize, segByteSize) + *res = data + } + + for i := uint32(0); i < segByteSize; i++ { + data[i] = a[i] ^ b[i] + } + + return +} + +func (db *DB) bExpireAt(key []byte, when int64) (int64, error) { + t := db.binBatch + t.Lock() + defer t.Unlock() + + if seq, _, err := db.bGetMeta(key); err != nil || seq < 0 { + return 0, err + } else { + db.expireAt(t, BitType, key, when) + if err := t.Commit(); err != nil { + return 0, err + } + } + return 1, nil +} + +func (db *DB) bCountByte(val byte, soff uint32, eoff uint32) int32 { + if soff > eoff { + soff, eoff = eoff, soff + } + + mask := uint8(0) + if soff > 0 { + mask |= fillBits[soff-1] + } + if eoff < 7 { + mask |= (fillBits[7] ^ fillBits[eoff]) + } + mask = fillBits[7] ^ mask + + return bitsInByte[val&mask] +} + +func (db *DB) bCountSeg(key []byte, seq uint32, soff uint32, eoff uint32) (cnt int32, err error) { + if soff >= segBitSize || soff < 0 || + eoff >= segBitSize || eoff < 0 { + return + } + + var segment []byte + if _, segment, err = db.bGetSegment(key, seq); err != nil { + return + } + + if segment == nil { + return + } + + if soff > eoff { + soff, eoff = eoff, soff + } + + headIdx := int(soff >> 3) + endIdx := int(eoff >> 3) + sByteOff := soff - ((soff >> 3) << 3) + eByteOff := eoff - ((eoff >> 3) << 3) + + if headIdx == endIdx { + cnt = db.bCountByte(segment[headIdx], sByteOff, eByteOff) + } else { + cnt = db.bCountByte(segment[headIdx], sByteOff, 7) + + db.bCountByte(segment[endIdx], 0, eByteOff) + } + + // sum up following bytes + for idx, end := headIdx+1, endIdx-1; idx <= end; idx += 1 { + cnt += bitsInByte[segment[idx]] + if idx == end { + break + } + } + + return +} + +func (db *DB) BGet(key []byte) (data []byte, err error) { + if err = checkKeySize(key); err != nil { + return + } + + var ts, to int32 + if ts, to, err = db.bGetMeta(key); err != nil || ts < 0 { + return + } + + var tailSeq, tailOff = uint32(ts), uint32(to) + var capByteSize uint32 = db.bCapByteSize(tailSeq, tailOff) + data = make([]byte, capByteSize, capByteSize) + + minKey := db.bEncodeBinKey(key, minSeq) + maxKey := db.bEncodeBinKey(key, tailSeq) + it := db.bucket.RangeIterator(minKey, maxKey, store.RangeClose) + + var seq, s, e uint32 + for ; it.Valid(); it.Next() { + if _, seq, err = db.bDecodeBinKey(it.RawKey()); err != nil { + data = nil + break + } + + s = seq << segByteWidth + e = MinUInt32(s+segByteSize, capByteSize) + copy(data[s:e], it.RawValue()) + } + it.Close() + + return +} + +func (db *DB) BDelete(key []byte) (drop int64, err error) { + if err = checkKeySize(key); err != nil { + return + } + + t := db.binBatch + t.Lock() + defer t.Unlock() + + drop = db.bDelete(t, key) + db.rmExpire(t, BitType, key) + + err = t.Commit() + return +} + +func (db *DB) BSetBit(key []byte, offset int32, val uint8) (ori uint8, err error) { + if err = checkKeySize(key); err != nil { + return + } + + // todo : check offset + var seq, off uint32 + if seq, off, err = db.bParseOffset(key, offset); err != nil { + return 0, err + } + + var bk, segment []byte + if bk, segment, err = db.bAllocateSegment(key, seq); err != nil { + return 0, err + } + + if segment != nil { + ori = getBit(segment, off) + if setBit(segment, off, val) { + t := db.binBatch + t.Lock() + defer t.Unlock() + + t.Put(bk, segment) + if _, _, e := db.bUpdateMeta(t, key, seq, off); e != nil { + err = e + return + } + + err = t.Commit() + } + } + + return +} + +func (db *DB) BMSetBit(key []byte, args ...BitPair) (place int64, err error) { + if err = checkKeySize(key); err != nil { + return + } + + // (ps : so as to aviod wasting memory copy while calling db.Get() and batch.Put(), + // here we sequence the params by pos, so that we can merge the execution of + // diff pos setting which targets on the same segment respectively. ) + + // #1 : sequence request data + var argCnt = len(args) + var bitInfos segBitInfoArray = make(segBitInfoArray, argCnt) + var seq, off uint32 + + for i, info := range args { + if seq, off, err = db.bParseOffset(key, info.Pos); err != nil { + return + } + + bitInfos[i].Seq = seq + bitInfos[i].Off = off + bitInfos[i].Val = info.Val + } + + sort.Sort(bitInfos) + + for i := 1; i < argCnt; i++ { + if bitInfos[i].Seq == bitInfos[i-1].Seq && bitInfos[i].Off == bitInfos[i-1].Off { + return 0, errDuplicatePos + } + } + + // #2 : execute bit set in order + t := db.binBatch + t.Lock() + defer t.Unlock() + + var curBinKey, curSeg []byte + var curSeq, maxSeq, maxOff uint32 + + for _, info := range bitInfos { + if curSeg != nil && info.Seq != curSeq { + t.Put(curBinKey, curSeg) + curSeg = nil + } + + if curSeg == nil { + curSeq = info.Seq + if curBinKey, curSeg, err = db.bAllocateSegment(key, info.Seq); err != nil { + return + } + + if curSeg == nil { + continue + } + } + + if setBit(curSeg, info.Off, info.Val) { + maxSeq = info.Seq + maxOff = info.Off + place++ + } + } + + if curSeg != nil { + t.Put(curBinKey, curSeg) + } + + // finally, update meta + if place > 0 { + if _, _, err = db.bUpdateMeta(t, key, maxSeq, maxOff); err != nil { + return + } + + err = t.Commit() + } + + return +} + +func (db *DB) BGetBit(key []byte, offset int32) (uint8, error) { + if seq, off, err := db.bParseOffset(key, offset); err != nil { + return 0, err + } else { + _, segment, err := db.bGetSegment(key, seq) + if err != nil { + return 0, err + } + + if segment == nil { + return 0, nil + } else { + return getBit(segment, off), nil + } + } +} + +// func (db *DB) BGetRange(key []byte, start int32, end int32) ([]byte, error) { +// section := make([]byte) + +// return +// } + +func (db *DB) BCount(key []byte, start int32, end int32) (cnt int32, err error) { + var sseq, soff uint32 + if sseq, soff, err = db.bParseOffset(key, start); err != nil { + return + } + + var eseq, eoff uint32 + if eseq, eoff, err = db.bParseOffset(key, end); err != nil { + return + } + + if sseq > eseq || (sseq == eseq && soff > eoff) { + sseq, eseq = eseq, sseq + soff, eoff = eoff, soff + } + + var segCnt int32 + if eseq == sseq { + if segCnt, err = db.bCountSeg(key, sseq, soff, eoff); err != nil { + return 0, err + } + + cnt = segCnt + + } else { + if segCnt, err = db.bCountSeg(key, sseq, soff, segBitSize-1); err != nil { + return 0, err + } else { + cnt += segCnt + } + + if segCnt, err = db.bCountSeg(key, eseq, 0, eoff); err != nil { + return 0, err + } else { + cnt += segCnt + } + } + + // middle segs + var segment []byte + skey := db.bEncodeBinKey(key, sseq) + ekey := db.bEncodeBinKey(key, eseq) + + it := db.bucket.RangeIterator(skey, ekey, store.RangeOpen) + for ; it.Valid(); it.Next() { + segment = it.RawValue() + for _, bt := range segment { + cnt += bitsInByte[bt] + } + } + it.Close() + + return +} + +func (db *DB) BTail(key []byte) (int32, error) { + // effective length of data, the highest bit-pos set in history + tailSeq, tailOff, err := db.bGetMeta(key) + if err != nil { + return 0, err + } + + tail := int32(-1) + if tailSeq >= 0 { + tail = int32(uint32(tailSeq)<<segBitWidth | uint32(tailOff)) + } + + return tail, nil +} + +func (db *DB) BOperation(op uint8, dstkey []byte, srckeys ...[]byte) (blen int32, err error) { + // blen - + // the total bit size of data stored in destination key, + // that is equal to the size of the longest input string. + + var exeOp func([]byte, []byte, *[]byte) + switch op { + case OPand: + exeOp = db.bSegAnd + case OPor: + exeOp = db.bSegOr + case OPxor, OPnot: + exeOp = db.bSegXor + default: + return + } + + if dstkey == nil || srckeys == nil { + return + } + + t := db.binBatch + t.Lock() + defer t.Unlock() + + var srcKseq, srcKoff int32 + var seq, off, maxDstSeq, maxDstOff uint32 + + var keyNum int = len(srckeys) + var validKeyNum int + for i := 0; i < keyNum; i++ { + if srcKseq, srcKoff, err = db.bGetMeta(srckeys[i]); err != nil { + return + } else if srcKseq < 0 { + srckeys[i] = nil + continue + } + + validKeyNum++ + + seq = uint32(srcKseq) + off = uint32(srcKoff) + if seq > maxDstSeq || (seq == maxDstSeq && off > maxDstOff) { + maxDstSeq = seq + maxDstOff = off + } + } + + if (op == OPnot && validKeyNum != 1) || + (op != OPnot && validKeyNum < 2) { + return // with not enough existing source key + } + + var srcIdx int + for srcIdx = 0; srcIdx < keyNum; srcIdx++ { + if srckeys[srcIdx] != nil { + break + } + } + + // init - data + var segments = make([][]byte, maxDstSeq+1) + + if op == OPnot { + // ps : + // ( ~num == num ^ 0x11111111 ) + // we init the result segments with all bit set, + // then we can calculate through the way of 'xor'. + + // ahead segments bin format : 1111 ... 1111 + for i := uint32(0); i < maxDstSeq; i++ { + segments[i] = fillSegment + } + + // last segment bin format : 1111..1100..0000 + var tailSeg = make([]byte, segByteSize, segByteSize) + var fillByte = fillBits[7] + var tailSegLen = db.bCapByteSize(uint32(0), maxDstOff) + for i := uint32(0); i < tailSegLen-1; i++ { + tailSeg[i] = fillByte + } + tailSeg[tailSegLen-1] = fillBits[maxDstOff-(tailSegLen-1)<<3] + segments[maxDstSeq] = tailSeg + + } else { + // ps : init segments by data corresponding to the 1st valid source key + it := db.bIterator(srckeys[srcIdx]) + for ; it.Valid(); it.Next() { + if _, seq, err = db.bDecodeBinKey(it.RawKey()); err != nil { + // to do ... + it.Close() + return + } + segments[seq] = it.Value() + } + it.Close() + srcIdx++ + } + + // operation with following keys + var res []byte + for i := srcIdx; i < keyNum; i++ { + if srckeys[i] == nil { + continue + } + + it := db.bIterator(srckeys[i]) + for idx, end := uint32(0), false; !end; it.Next() { + end = !it.Valid() + if !end { + if _, seq, err = db.bDecodeBinKey(it.RawKey()); err != nil { + // to do ... + it.Close() + return + } + } else { + seq = maxDstSeq + 1 + } + + // todo : + // operation 'and' can be optimize here : + // if seq > max_segments_idx, this loop can be break, + // which can avoid cost from Key() and bDecodeBinKey() + + for ; idx < seq; idx++ { + res = nil + exeOp(segments[idx], nil, &res) + segments[idx] = res + } + + if !end { + res = it.Value() + exeOp(segments[seq], res, &res) + segments[seq] = res + idx++ + } + } + it.Close() + } + + // clear the old data in case + db.bDelete(t, dstkey) + db.rmExpire(t, BitType, dstkey) + + // set data + db.bSetMeta(t, dstkey, maxDstSeq, maxDstOff) + + var bk []byte + for seq, segt := range segments { + if segt != nil { + bk = db.bEncodeBinKey(dstkey, uint32(seq)) + t.Put(bk, segt) + } + } + + err = t.Commit() + if err == nil { + // blen = int32(db.bCapByteSize(maxDstOff, maxDstOff)) + blen = int32(maxDstSeq<<segBitWidth | maxDstOff + 1) + } + + return +} + +func (db *DB) BExpire(key []byte, duration int64) (int64, error) { + if duration <= 0 { + return 0, errExpireValue + } + + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.bExpireAt(key, time.Now().Unix()+duration) +} + +func (db *DB) BExpireAt(key []byte, when int64) (int64, error) { + if when <= time.Now().Unix() { + return 0, errExpireValue + } + + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.bExpireAt(key, when) +} + +func (db *DB) BTTL(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.ttl(BitType, key) +} + +func (db *DB) BPersist(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.binBatch + t.Lock() + defer t.Unlock() + + n, err := db.rmExpire(t, BitType, key) + if err != nil { + return 0, err + } + + err = t.Commit() + return n, err +} + +func (db *DB) BScan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { + return db.scan(BitMetaType, key, count, inclusive, match) +} + +func (db *DB) bFlush() (drop int64, err error) { + t := db.binBatch + t.Lock() + defer t.Unlock() + + return db.flushType(t, BitType) +} diff --git a/vendor/github.com/lunny/nodb/t_hash.go b/vendor/github.com/lunny/nodb/t_hash.go new file mode 100644 index 0000000000..bedfbf7c3e --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_hash.go @@ -0,0 +1,509 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "time" + + "github.com/lunny/nodb/store" +) + +type FVPair struct { + Field []byte + Value []byte +} + +var errHashKey = errors.New("invalid hash key") +var errHSizeKey = errors.New("invalid hsize key") + +const ( + hashStartSep byte = ':' + hashStopSep byte = hashStartSep + 1 +) + +func checkHashKFSize(key []byte, field []byte) error { + if len(key) > MaxKeySize || len(key) == 0 { + return errKeySize + } else if len(field) > MaxHashFieldSize || len(field) == 0 { + return errHashFieldSize + } + return nil +} + +func (db *DB) hEncodeSizeKey(key []byte) []byte { + buf := make([]byte, len(key)+2) + + buf[0] = db.index + buf[1] = HSizeType + + copy(buf[2:], key) + return buf +} + +func (db *DB) hDecodeSizeKey(ek []byte) ([]byte, error) { + if len(ek) < 2 || ek[0] != db.index || ek[1] != HSizeType { + return nil, errHSizeKey + } + + return ek[2:], nil +} + +func (db *DB) hEncodeHashKey(key []byte, field []byte) []byte { + buf := make([]byte, len(key)+len(field)+1+1+2+1) + + pos := 0 + buf[pos] = db.index + pos++ + buf[pos] = HashType + pos++ + + binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) + pos += 2 + + copy(buf[pos:], key) + pos += len(key) + + buf[pos] = hashStartSep + pos++ + copy(buf[pos:], field) + + return buf +} + +func (db *DB) hDecodeHashKey(ek []byte) ([]byte, []byte, error) { + if len(ek) < 5 || ek[0] != db.index || ek[1] != HashType { + return nil, nil, errHashKey + } + + pos := 2 + keyLen := int(binary.BigEndian.Uint16(ek[pos:])) + pos += 2 + + if keyLen+5 > len(ek) { + return nil, nil, errHashKey + } + + key := ek[pos : pos+keyLen] + pos += keyLen + + if ek[pos] != hashStartSep { + return nil, nil, errHashKey + } + + pos++ + field := ek[pos:] + return key, field, nil +} + +func (db *DB) hEncodeStartKey(key []byte) []byte { + return db.hEncodeHashKey(key, nil) +} + +func (db *DB) hEncodeStopKey(key []byte) []byte { + k := db.hEncodeHashKey(key, nil) + + k[len(k)-1] = hashStopSep + + return k +} + +func (db *DB) hSetItem(key []byte, field []byte, value []byte) (int64, error) { + t := db.hashBatch + + ek := db.hEncodeHashKey(key, field) + + var n int64 = 1 + if v, _ := db.bucket.Get(ek); v != nil { + n = 0 + } else { + if _, err := db.hIncrSize(key, 1); err != nil { + return 0, err + } + } + + t.Put(ek, value) + return n, nil +} + +// ps : here just focus on deleting the hash data, +// any other likes expire is ignore. +func (db *DB) hDelete(t *batch, key []byte) int64 { + sk := db.hEncodeSizeKey(key) + start := db.hEncodeStartKey(key) + stop := db.hEncodeStopKey(key) + + var num int64 = 0 + it := db.bucket.RangeLimitIterator(start, stop, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + t.Delete(it.Key()) + num++ + } + it.Close() + + t.Delete(sk) + return num +} + +func (db *DB) hExpireAt(key []byte, when int64) (int64, error) { + t := db.hashBatch + t.Lock() + defer t.Unlock() + + if hlen, err := db.HLen(key); err != nil || hlen == 0 { + return 0, err + } else { + db.expireAt(t, HashType, key, when) + if err := t.Commit(); err != nil { + return 0, err + } + } + return 1, nil +} + +func (db *DB) HLen(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + return Int64(db.bucket.Get(db.hEncodeSizeKey(key))) +} + +func (db *DB) HSet(key []byte, field []byte, value []byte) (int64, error) { + if err := checkHashKFSize(key, field); err != nil { + return 0, err + } else if err := checkValueSize(value); err != nil { + return 0, err + } + + t := db.hashBatch + t.Lock() + defer t.Unlock() + + n, err := db.hSetItem(key, field, value) + if err != nil { + return 0, err + } + + //todo add binlog + + err = t.Commit() + return n, err +} + +func (db *DB) HGet(key []byte, field []byte) ([]byte, error) { + if err := checkHashKFSize(key, field); err != nil { + return nil, err + } + + return db.bucket.Get(db.hEncodeHashKey(key, field)) +} + +func (db *DB) HMset(key []byte, args ...FVPair) error { + t := db.hashBatch + t.Lock() + defer t.Unlock() + + var err error + var ek []byte + var num int64 = 0 + for i := 0; i < len(args); i++ { + if err := checkHashKFSize(key, args[i].Field); err != nil { + return err + } else if err := checkValueSize(args[i].Value); err != nil { + return err + } + + ek = db.hEncodeHashKey(key, args[i].Field) + + if v, err := db.bucket.Get(ek); err != nil { + return err + } else if v == nil { + num++ + } + + t.Put(ek, args[i].Value) + } + + if _, err = db.hIncrSize(key, num); err != nil { + return err + } + + //todo add binglog + err = t.Commit() + return err +} + +func (db *DB) HMget(key []byte, args ...[]byte) ([][]byte, error) { + var ek []byte + + it := db.bucket.NewIterator() + defer it.Close() + + r := make([][]byte, len(args)) + for i := 0; i < len(args); i++ { + if err := checkHashKFSize(key, args[i]); err != nil { + return nil, err + } + + ek = db.hEncodeHashKey(key, args[i]) + + r[i] = it.Find(ek) + } + + return r, nil +} + +func (db *DB) HDel(key []byte, args ...[]byte) (int64, error) { + t := db.hashBatch + + var ek []byte + var v []byte + var err error + + t.Lock() + defer t.Unlock() + + it := db.bucket.NewIterator() + defer it.Close() + + var num int64 = 0 + for i := 0; i < len(args); i++ { + if err := checkHashKFSize(key, args[i]); err != nil { + return 0, err + } + + ek = db.hEncodeHashKey(key, args[i]) + + v = it.RawFind(ek) + if v == nil { + continue + } else { + num++ + t.Delete(ek) + } + } + + if _, err = db.hIncrSize(key, -num); err != nil { + return 0, err + } + + err = t.Commit() + + return num, err +} + +func (db *DB) hIncrSize(key []byte, delta int64) (int64, error) { + t := db.hashBatch + sk := db.hEncodeSizeKey(key) + + var err error + var size int64 = 0 + if size, err = Int64(db.bucket.Get(sk)); err != nil { + return 0, err + } else { + size += delta + if size <= 0 { + size = 0 + t.Delete(sk) + db.rmExpire(t, HashType, key) + } else { + t.Put(sk, PutInt64(size)) + } + } + + return size, nil +} + +func (db *DB) HIncrBy(key []byte, field []byte, delta int64) (int64, error) { + if err := checkHashKFSize(key, field); err != nil { + return 0, err + } + + t := db.hashBatch + var ek []byte + var err error + + t.Lock() + defer t.Unlock() + + ek = db.hEncodeHashKey(key, field) + + var n int64 = 0 + if n, err = StrInt64(db.bucket.Get(ek)); err != nil { + return 0, err + } + + n += delta + + _, err = db.hSetItem(key, field, StrPutInt64(n)) + if err != nil { + return 0, err + } + + err = t.Commit() + + return n, err +} + +func (db *DB) HGetAll(key []byte) ([]FVPair, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + start := db.hEncodeStartKey(key) + stop := db.hEncodeStopKey(key) + + v := make([]FVPair, 0, 16) + + it := db.bucket.RangeLimitIterator(start, stop, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + _, f, err := db.hDecodeHashKey(it.Key()) + if err != nil { + return nil, err + } + + v = append(v, FVPair{Field: f, Value: it.Value()}) + } + + it.Close() + + return v, nil +} + +func (db *DB) HKeys(key []byte) ([][]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + start := db.hEncodeStartKey(key) + stop := db.hEncodeStopKey(key) + + v := make([][]byte, 0, 16) + + it := db.bucket.RangeLimitIterator(start, stop, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + _, f, err := db.hDecodeHashKey(it.Key()) + if err != nil { + return nil, err + } + v = append(v, f) + } + + it.Close() + + return v, nil +} + +func (db *DB) HValues(key []byte) ([][]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + start := db.hEncodeStartKey(key) + stop := db.hEncodeStopKey(key) + + v := make([][]byte, 0, 16) + + it := db.bucket.RangeLimitIterator(start, stop, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + _, _, err := db.hDecodeHashKey(it.Key()) + if err != nil { + return nil, err + } + + v = append(v, it.Value()) + } + + it.Close() + + return v, nil +} + +func (db *DB) HClear(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.hashBatch + t.Lock() + defer t.Unlock() + + num := db.hDelete(t, key) + db.rmExpire(t, HashType, key) + + err := t.Commit() + return num, err +} + +func (db *DB) HMclear(keys ...[]byte) (int64, error) { + t := db.hashBatch + t.Lock() + defer t.Unlock() + + for _, key := range keys { + if err := checkKeySize(key); err != nil { + return 0, err + } + + db.hDelete(t, key) + db.rmExpire(t, HashType, key) + } + + err := t.Commit() + return int64(len(keys)), err +} + +func (db *DB) hFlush() (drop int64, err error) { + t := db.hashBatch + + t.Lock() + defer t.Unlock() + + return db.flushType(t, HashType) +} + +func (db *DB) HScan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { + return db.scan(HSizeType, key, count, inclusive, match) +} + +func (db *DB) HExpire(key []byte, duration int64) (int64, error) { + if duration <= 0 { + return 0, errExpireValue + } + + return db.hExpireAt(key, time.Now().Unix()+duration) +} + +func (db *DB) HExpireAt(key []byte, when int64) (int64, error) { + if when <= time.Now().Unix() { + return 0, errExpireValue + } + + return db.hExpireAt(key, when) +} + +func (db *DB) HTTL(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.ttl(HashType, key) +} + +func (db *DB) HPersist(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.hashBatch + t.Lock() + defer t.Unlock() + + n, err := db.rmExpire(t, HashType, key) + if err != nil { + return 0, err + } + + err = t.Commit() + return n, err +} diff --git a/vendor/github.com/lunny/nodb/t_kv.go b/vendor/github.com/lunny/nodb/t_kv.go new file mode 100644 index 0000000000..82a12f7027 --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_kv.go @@ -0,0 +1,387 @@ +package nodb + +import ( + "errors" + "time" +) + +type KVPair struct { + Key []byte + Value []byte +} + +var errKVKey = errors.New("invalid encode kv key") + +func checkKeySize(key []byte) error { + if len(key) > MaxKeySize || len(key) == 0 { + return errKeySize + } + return nil +} + +func checkValueSize(value []byte) error { + if len(value) > MaxValueSize { + return errValueSize + } + + return nil +} + +func (db *DB) encodeKVKey(key []byte) []byte { + ek := make([]byte, len(key)+2) + ek[0] = db.index + ek[1] = KVType + copy(ek[2:], key) + return ek +} + +func (db *DB) decodeKVKey(ek []byte) ([]byte, error) { + if len(ek) < 2 || ek[0] != db.index || ek[1] != KVType { + return nil, errKVKey + } + + return ek[2:], nil +} + +func (db *DB) encodeKVMinKey() []byte { + ek := db.encodeKVKey(nil) + return ek +} + +func (db *DB) encodeKVMaxKey() []byte { + ek := db.encodeKVKey(nil) + ek[len(ek)-1] = KVType + 1 + return ek +} + +func (db *DB) incr(key []byte, delta int64) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + var err error + key = db.encodeKVKey(key) + + t := db.kvBatch + + t.Lock() + defer t.Unlock() + + var n int64 + n, err = StrInt64(db.bucket.Get(key)) + if err != nil { + return 0, err + } + + n += delta + + t.Put(key, StrPutInt64(n)) + + //todo binlog + + err = t.Commit() + return n, err +} + +// ps : here just focus on deleting the key-value data, +// any other likes expire is ignore. +func (db *DB) delete(t *batch, key []byte) int64 { + key = db.encodeKVKey(key) + t.Delete(key) + return 1 +} + +func (db *DB) setExpireAt(key []byte, when int64) (int64, error) { + t := db.kvBatch + t.Lock() + defer t.Unlock() + + if exist, err := db.Exists(key); err != nil || exist == 0 { + return 0, err + } else { + db.expireAt(t, KVType, key, when) + if err := t.Commit(); err != nil { + return 0, err + } + } + return 1, nil +} + +func (db *DB) Decr(key []byte) (int64, error) { + return db.incr(key, -1) +} + +func (db *DB) DecrBy(key []byte, decrement int64) (int64, error) { + return db.incr(key, -decrement) +} + +func (db *DB) Del(keys ...[]byte) (int64, error) { + if len(keys) == 0 { + return 0, nil + } + + codedKeys := make([][]byte, len(keys)) + for i, k := range keys { + codedKeys[i] = db.encodeKVKey(k) + } + + t := db.kvBatch + t.Lock() + defer t.Unlock() + + for i, k := range keys { + t.Delete(codedKeys[i]) + db.rmExpire(t, KVType, k) + } + + err := t.Commit() + return int64(len(keys)), err +} + +func (db *DB) Exists(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + var err error + key = db.encodeKVKey(key) + + var v []byte + v, err = db.bucket.Get(key) + if v != nil && err == nil { + return 1, nil + } + + return 0, err +} + +func (db *DB) Get(key []byte) ([]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + key = db.encodeKVKey(key) + + return db.bucket.Get(key) +} + +func (db *DB) GetSet(key []byte, value []byte) ([]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } else if err := checkValueSize(value); err != nil { + return nil, err + } + + key = db.encodeKVKey(key) + + t := db.kvBatch + + t.Lock() + defer t.Unlock() + + oldValue, err := db.bucket.Get(key) + if err != nil { + return nil, err + } + + t.Put(key, value) + //todo, binlog + + err = t.Commit() + + return oldValue, err +} + +func (db *DB) Incr(key []byte) (int64, error) { + return db.incr(key, 1) +} + +func (db *DB) IncrBy(key []byte, increment int64) (int64, error) { + return db.incr(key, increment) +} + +func (db *DB) MGet(keys ...[]byte) ([][]byte, error) { + values := make([][]byte, len(keys)) + + it := db.bucket.NewIterator() + defer it.Close() + + for i := range keys { + if err := checkKeySize(keys[i]); err != nil { + return nil, err + } + + values[i] = it.Find(db.encodeKVKey(keys[i])) + } + + return values, nil +} + +func (db *DB) MSet(args ...KVPair) error { + if len(args) == 0 { + return nil + } + + t := db.kvBatch + + var err error + var key []byte + var value []byte + + t.Lock() + defer t.Unlock() + + for i := 0; i < len(args); i++ { + if err := checkKeySize(args[i].Key); err != nil { + return err + } else if err := checkValueSize(args[i].Value); err != nil { + return err + } + + key = db.encodeKVKey(args[i].Key) + + value = args[i].Value + + t.Put(key, value) + + //todo binlog + } + + err = t.Commit() + return err +} + +func (db *DB) Set(key []byte, value []byte) error { + if err := checkKeySize(key); err != nil { + return err + } else if err := checkValueSize(value); err != nil { + return err + } + + var err error + key = db.encodeKVKey(key) + + t := db.kvBatch + + t.Lock() + defer t.Unlock() + + t.Put(key, value) + + err = t.Commit() + + return err +} + +func (db *DB) SetNX(key []byte, value []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } else if err := checkValueSize(value); err != nil { + return 0, err + } + + var err error + key = db.encodeKVKey(key) + + var n int64 = 1 + + t := db.kvBatch + + t.Lock() + defer t.Unlock() + + if v, err := db.bucket.Get(key); err != nil { + return 0, err + } else if v != nil { + n = 0 + } else { + t.Put(key, value) + + //todo binlog + + err = t.Commit() + } + + return n, err +} + +func (db *DB) flush() (drop int64, err error) { + t := db.kvBatch + t.Lock() + defer t.Unlock() + return db.flushType(t, KVType) +} + +//if inclusive is true, scan range [key, inf) else (key, inf) +func (db *DB) Scan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { + return db.scan(KVType, key, count, inclusive, match) +} + +func (db *DB) Expire(key []byte, duration int64) (int64, error) { + if duration <= 0 { + return 0, errExpireValue + } + + return db.setExpireAt(key, time.Now().Unix()+duration) +} + +func (db *DB) ExpireAt(key []byte, when int64) (int64, error) { + if when <= time.Now().Unix() { + return 0, errExpireValue + } + + return db.setExpireAt(key, when) +} + +func (db *DB) TTL(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.ttl(KVType, key) +} + +func (db *DB) Persist(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.kvBatch + t.Lock() + defer t.Unlock() + n, err := db.rmExpire(t, KVType, key) + if err != nil { + return 0, err + } + + err = t.Commit() + return n, err +} + +func (db *DB) Lock() { + t := db.kvBatch + t.Lock() +} + +func (db *DB) Remove(key []byte) bool { + if len(key) == 0 { + return false + } + t := db.kvBatch + t.Delete(db.encodeKVKey(key)) + _, err := db.rmExpire(t, KVType, key) + if err != nil { + return false + } + return true +} + +func (db *DB) Commit() error { + t := db.kvBatch + return t.Commit() +} + +func (db *DB) Unlock() { + t := db.kvBatch + t.Unlock() +} diff --git a/vendor/github.com/lunny/nodb/t_list.go b/vendor/github.com/lunny/nodb/t_list.go new file mode 100644 index 0000000000..5b9d9d9c21 --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_list.go @@ -0,0 +1,492 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "time" + + "github.com/lunny/nodb/store" +) + +const ( + listHeadSeq int32 = 1 + listTailSeq int32 = 2 + + listMinSeq int32 = 1000 + listMaxSeq int32 = 1<<31 - 1000 + listInitialSeq int32 = listMinSeq + (listMaxSeq-listMinSeq)/2 +) + +var errLMetaKey = errors.New("invalid lmeta key") +var errListKey = errors.New("invalid list key") +var errListSeq = errors.New("invalid list sequence, overflow") + +func (db *DB) lEncodeMetaKey(key []byte) []byte { + buf := make([]byte, len(key)+2) + buf[0] = db.index + buf[1] = LMetaType + + copy(buf[2:], key) + return buf +} + +func (db *DB) lDecodeMetaKey(ek []byte) ([]byte, error) { + if len(ek) < 2 || ek[0] != db.index || ek[1] != LMetaType { + return nil, errLMetaKey + } + + return ek[2:], nil +} + +func (db *DB) lEncodeListKey(key []byte, seq int32) []byte { + buf := make([]byte, len(key)+8) + + pos := 0 + buf[pos] = db.index + pos++ + buf[pos] = ListType + pos++ + + binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) + pos += 2 + + copy(buf[pos:], key) + pos += len(key) + + binary.BigEndian.PutUint32(buf[pos:], uint32(seq)) + + return buf +} + +func (db *DB) lDecodeListKey(ek []byte) (key []byte, seq int32, err error) { + if len(ek) < 8 || ek[0] != db.index || ek[1] != ListType { + err = errListKey + return + } + + keyLen := int(binary.BigEndian.Uint16(ek[2:])) + if keyLen+8 != len(ek) { + err = errListKey + return + } + + key = ek[4 : 4+keyLen] + seq = int32(binary.BigEndian.Uint32(ek[4+keyLen:])) + return +} + +func (db *DB) lpush(key []byte, whereSeq int32, args ...[]byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + var headSeq int32 + var tailSeq int32 + var size int32 + var err error + + t := db.listBatch + t.Lock() + defer t.Unlock() + + metaKey := db.lEncodeMetaKey(key) + headSeq, tailSeq, size, err = db.lGetMeta(nil, metaKey) + if err != nil { + return 0, err + } + + var pushCnt int = len(args) + if pushCnt == 0 { + return int64(size), nil + } + + var seq int32 = headSeq + var delta int32 = -1 + if whereSeq == listTailSeq { + seq = tailSeq + delta = 1 + } + + // append elements + if size > 0 { + seq += delta + } + + for i := 0; i < pushCnt; i++ { + ek := db.lEncodeListKey(key, seq+int32(i)*delta) + t.Put(ek, args[i]) + } + + seq += int32(pushCnt-1) * delta + if seq <= listMinSeq || seq >= listMaxSeq { + return 0, errListSeq + } + + // set meta info + if whereSeq == listHeadSeq { + headSeq = seq + } else { + tailSeq = seq + } + + db.lSetMeta(metaKey, headSeq, tailSeq) + + err = t.Commit() + return int64(size) + int64(pushCnt), err +} + +func (db *DB) lpop(key []byte, whereSeq int32) ([]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + t := db.listBatch + t.Lock() + defer t.Unlock() + + var headSeq int32 + var tailSeq int32 + var err error + + metaKey := db.lEncodeMetaKey(key) + headSeq, tailSeq, _, err = db.lGetMeta(nil, metaKey) + if err != nil { + return nil, err + } + + var value []byte + + var seq int32 = headSeq + if whereSeq == listTailSeq { + seq = tailSeq + } + + itemKey := db.lEncodeListKey(key, seq) + value, err = db.bucket.Get(itemKey) + if err != nil { + return nil, err + } + + if whereSeq == listHeadSeq { + headSeq += 1 + } else { + tailSeq -= 1 + } + + t.Delete(itemKey) + size := db.lSetMeta(metaKey, headSeq, tailSeq) + if size == 0 { + db.rmExpire(t, HashType, key) + } + + err = t.Commit() + return value, err +} + +// ps : here just focus on deleting the list data, +// any other likes expire is ignore. +func (db *DB) lDelete(t *batch, key []byte) int64 { + mk := db.lEncodeMetaKey(key) + + var headSeq int32 + var tailSeq int32 + var err error + + it := db.bucket.NewIterator() + defer it.Close() + + headSeq, tailSeq, _, err = db.lGetMeta(it, mk) + if err != nil { + return 0 + } + + var num int64 = 0 + startKey := db.lEncodeListKey(key, headSeq) + stopKey := db.lEncodeListKey(key, tailSeq) + + rit := store.NewRangeIterator(it, &store.Range{startKey, stopKey, store.RangeClose}) + for ; rit.Valid(); rit.Next() { + t.Delete(rit.RawKey()) + num++ + } + + t.Delete(mk) + + return num +} + +func (db *DB) lGetMeta(it *store.Iterator, ek []byte) (headSeq int32, tailSeq int32, size int32, err error) { + var v []byte + if it != nil { + v = it.Find(ek) + } else { + v, err = db.bucket.Get(ek) + } + if err != nil { + return + } else if v == nil { + headSeq = listInitialSeq + tailSeq = listInitialSeq + size = 0 + return + } else { + headSeq = int32(binary.LittleEndian.Uint32(v[0:4])) + tailSeq = int32(binary.LittleEndian.Uint32(v[4:8])) + size = tailSeq - headSeq + 1 + } + return +} + +func (db *DB) lSetMeta(ek []byte, headSeq int32, tailSeq int32) int32 { + t := db.listBatch + + var size int32 = tailSeq - headSeq + 1 + if size < 0 { + // todo : log error + panic + } else if size == 0 { + t.Delete(ek) + } else { + buf := make([]byte, 8) + + binary.LittleEndian.PutUint32(buf[0:4], uint32(headSeq)) + binary.LittleEndian.PutUint32(buf[4:8], uint32(tailSeq)) + + t.Put(ek, buf) + } + + return size +} + +func (db *DB) lExpireAt(key []byte, when int64) (int64, error) { + t := db.listBatch + t.Lock() + defer t.Unlock() + + if llen, err := db.LLen(key); err != nil || llen == 0 { + return 0, err + } else { + db.expireAt(t, ListType, key, when) + if err := t.Commit(); err != nil { + return 0, err + } + } + return 1, nil +} + +func (db *DB) LIndex(key []byte, index int32) ([]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + var seq int32 + var headSeq int32 + var tailSeq int32 + var err error + + metaKey := db.lEncodeMetaKey(key) + + it := db.bucket.NewIterator() + defer it.Close() + + headSeq, tailSeq, _, err = db.lGetMeta(it, metaKey) + if err != nil { + return nil, err + } + + if index >= 0 { + seq = headSeq + index + } else { + seq = tailSeq + index + 1 + } + + sk := db.lEncodeListKey(key, seq) + v := it.Find(sk) + + return v, nil +} + +func (db *DB) LLen(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + ek := db.lEncodeMetaKey(key) + _, _, size, err := db.lGetMeta(nil, ek) + return int64(size), err +} + +func (db *DB) LPop(key []byte) ([]byte, error) { + return db.lpop(key, listHeadSeq) +} + +func (db *DB) LPush(key []byte, arg1 []byte, args ...[]byte) (int64, error) { + var argss = [][]byte{arg1} + argss = append(argss, args...) + return db.lpush(key, listHeadSeq, argss...) +} + +func (db *DB) LRange(key []byte, start int32, stop int32) ([][]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + var headSeq int32 + var llen int32 + var err error + + metaKey := db.lEncodeMetaKey(key) + + it := db.bucket.NewIterator() + defer it.Close() + + if headSeq, _, llen, err = db.lGetMeta(it, metaKey); err != nil { + return nil, err + } + + if start < 0 { + start = llen + start + } + if stop < 0 { + stop = llen + stop + } + if start < 0 { + start = 0 + } + + if start > stop || start >= llen { + return [][]byte{}, nil + } + + if stop >= llen { + stop = llen - 1 + } + + limit := (stop - start) + 1 + headSeq += start + + v := make([][]byte, 0, limit) + + startKey := db.lEncodeListKey(key, headSeq) + rit := store.NewRangeLimitIterator(it, + &store.Range{ + Min: startKey, + Max: nil, + Type: store.RangeClose}, + &store.Limit{ + Offset: 0, + Count: int(limit)}) + + for ; rit.Valid(); rit.Next() { + v = append(v, rit.Value()) + } + + return v, nil +} + +func (db *DB) RPop(key []byte) ([]byte, error) { + return db.lpop(key, listTailSeq) +} + +func (db *DB) RPush(key []byte, arg1 []byte, args ...[]byte) (int64, error) { + var argss = [][]byte{arg1} + argss = append(argss, args...) + return db.lpush(key, listTailSeq, argss...) +} + +func (db *DB) LClear(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.listBatch + t.Lock() + defer t.Unlock() + + num := db.lDelete(t, key) + db.rmExpire(t, ListType, key) + + err := t.Commit() + return num, err +} + +func (db *DB) LMclear(keys ...[]byte) (int64, error) { + t := db.listBatch + t.Lock() + defer t.Unlock() + + for _, key := range keys { + if err := checkKeySize(key); err != nil { + return 0, err + } + + db.lDelete(t, key) + db.rmExpire(t, ListType, key) + + } + + err := t.Commit() + return int64(len(keys)), err +} + +func (db *DB) lFlush() (drop int64, err error) { + t := db.listBatch + t.Lock() + defer t.Unlock() + return db.flushType(t, ListType) +} + +func (db *DB) LExpire(key []byte, duration int64) (int64, error) { + if duration <= 0 { + return 0, errExpireValue + } + + return db.lExpireAt(key, time.Now().Unix()+duration) +} + +func (db *DB) LExpireAt(key []byte, when int64) (int64, error) { + if when <= time.Now().Unix() { + return 0, errExpireValue + } + + return db.lExpireAt(key, when) +} + +func (db *DB) LTTL(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.ttl(ListType, key) +} + +func (db *DB) LPersist(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.listBatch + t.Lock() + defer t.Unlock() + + n, err := db.rmExpire(t, ListType, key) + if err != nil { + return 0, err + } + + err = t.Commit() + return n, err +} + +func (db *DB) LScan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { + return db.scan(LMetaType, key, count, inclusive, match) +} + +func (db *DB) lEncodeMinKey() []byte { + return db.lEncodeMetaKey(nil) +} + +func (db *DB) lEncodeMaxKey() []byte { + ek := db.lEncodeMetaKey(nil) + ek[len(ek)-1] = LMetaType + 1 + return ek +} diff --git a/vendor/github.com/lunny/nodb/t_set.go b/vendor/github.com/lunny/nodb/t_set.go new file mode 100644 index 0000000000..41ce30e8ce --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_set.go @@ -0,0 +1,601 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "time" + + "github.com/lunny/nodb/store" +) + +var errSetKey = errors.New("invalid set key") +var errSSizeKey = errors.New("invalid ssize key") + +const ( + setStartSep byte = ':' + setStopSep byte = setStartSep + 1 + UnionType byte = 51 + DiffType byte = 52 + InterType byte = 53 +) + +func checkSetKMSize(key []byte, member []byte) error { + if len(key) > MaxKeySize || len(key) == 0 { + return errKeySize + } else if len(member) > MaxSetMemberSize || len(member) == 0 { + return errSetMemberSize + } + return nil +} + +func (db *DB) sEncodeSizeKey(key []byte) []byte { + buf := make([]byte, len(key)+2) + + buf[0] = db.index + buf[1] = SSizeType + + copy(buf[2:], key) + return buf +} + +func (db *DB) sDecodeSizeKey(ek []byte) ([]byte, error) { + if len(ek) < 2 || ek[0] != db.index || ek[1] != SSizeType { + return nil, errSSizeKey + } + + return ek[2:], nil +} + +func (db *DB) sEncodeSetKey(key []byte, member []byte) []byte { + buf := make([]byte, len(key)+len(member)+1+1+2+1) + + pos := 0 + buf[pos] = db.index + pos++ + buf[pos] = SetType + pos++ + + binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) + pos += 2 + + copy(buf[pos:], key) + pos += len(key) + + buf[pos] = setStartSep + pos++ + copy(buf[pos:], member) + + return buf +} + +func (db *DB) sDecodeSetKey(ek []byte) ([]byte, []byte, error) { + if len(ek) < 5 || ek[0] != db.index || ek[1] != SetType { + return nil, nil, errSetKey + } + + pos := 2 + keyLen := int(binary.BigEndian.Uint16(ek[pos:])) + pos += 2 + + if keyLen+5 > len(ek) { + return nil, nil, errSetKey + } + + key := ek[pos : pos+keyLen] + pos += keyLen + + if ek[pos] != hashStartSep { + return nil, nil, errSetKey + } + + pos++ + member := ek[pos:] + return key, member, nil +} + +func (db *DB) sEncodeStartKey(key []byte) []byte { + return db.sEncodeSetKey(key, nil) +} + +func (db *DB) sEncodeStopKey(key []byte) []byte { + k := db.sEncodeSetKey(key, nil) + + k[len(k)-1] = setStopSep + + return k +} + +func (db *DB) sFlush() (drop int64, err error) { + + t := db.setBatch + t.Lock() + defer t.Unlock() + + return db.flushType(t, SetType) +} + +func (db *DB) sDelete(t *batch, key []byte) int64 { + sk := db.sEncodeSizeKey(key) + start := db.sEncodeStartKey(key) + stop := db.sEncodeStopKey(key) + + var num int64 = 0 + it := db.bucket.RangeLimitIterator(start, stop, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + t.Delete(it.RawKey()) + num++ + } + + it.Close() + + t.Delete(sk) + return num +} + +func (db *DB) sIncrSize(key []byte, delta int64) (int64, error) { + t := db.setBatch + sk := db.sEncodeSizeKey(key) + + var err error + var size int64 = 0 + if size, err = Int64(db.bucket.Get(sk)); err != nil { + return 0, err + } else { + size += delta + if size <= 0 { + size = 0 + t.Delete(sk) + db.rmExpire(t, SetType, key) + } else { + t.Put(sk, PutInt64(size)) + } + } + + return size, nil +} + +func (db *DB) sExpireAt(key []byte, when int64) (int64, error) { + t := db.setBatch + t.Lock() + defer t.Unlock() + + if scnt, err := db.SCard(key); err != nil || scnt == 0 { + return 0, err + } else { + db.expireAt(t, SetType, key, when) + if err := t.Commit(); err != nil { + return 0, err + } + + } + + return 1, nil +} + +func (db *DB) sSetItem(key []byte, member []byte) (int64, error) { + t := db.setBatch + ek := db.sEncodeSetKey(key, member) + + var n int64 = 1 + if v, _ := db.bucket.Get(ek); v != nil { + n = 0 + } else { + if _, err := db.sIncrSize(key, 1); err != nil { + return 0, err + } + } + + t.Put(ek, nil) + return n, nil +} + +func (db *DB) SAdd(key []byte, args ...[]byte) (int64, error) { + t := db.setBatch + t.Lock() + defer t.Unlock() + + var err error + var ek []byte + var num int64 = 0 + for i := 0; i < len(args); i++ { + if err := checkSetKMSize(key, args[i]); err != nil { + return 0, err + } + + ek = db.sEncodeSetKey(key, args[i]) + + if v, err := db.bucket.Get(ek); err != nil { + return 0, err + } else if v == nil { + num++ + } + + t.Put(ek, nil) + } + + if _, err = db.sIncrSize(key, num); err != nil { + return 0, err + } + + err = t.Commit() + return num, err + +} + +func (db *DB) SCard(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + sk := db.sEncodeSizeKey(key) + + return Int64(db.bucket.Get(sk)) +} + +func (db *DB) sDiffGeneric(keys ...[]byte) ([][]byte, error) { + destMap := make(map[string]bool) + + members, err := db.SMembers(keys[0]) + if err != nil { + return nil, err + } + + for _, m := range members { + destMap[String(m)] = true + } + + for _, k := range keys[1:] { + members, err := db.SMembers(k) + if err != nil { + return nil, err + } + + for _, m := range members { + if _, ok := destMap[String(m)]; !ok { + continue + } else if ok { + delete(destMap, String(m)) + } + } + // O - A = O, O is zero set. + if len(destMap) == 0 { + return nil, nil + } + } + + slice := make([][]byte, len(destMap)) + idx := 0 + for k, v := range destMap { + if !v { + continue + } + slice[idx] = []byte(k) + idx++ + } + + return slice, nil +} + +func (db *DB) SDiff(keys ...[]byte) ([][]byte, error) { + v, err := db.sDiffGeneric(keys...) + return v, err +} + +func (db *DB) SDiffStore(dstKey []byte, keys ...[]byte) (int64, error) { + n, err := db.sStoreGeneric(dstKey, DiffType, keys...) + return n, err +} + +func (db *DB) sInterGeneric(keys ...[]byte) ([][]byte, error) { + destMap := make(map[string]bool) + + members, err := db.SMembers(keys[0]) + if err != nil { + return nil, err + } + + for _, m := range members { + destMap[String(m)] = true + } + + for _, key := range keys[1:] { + if err := checkKeySize(key); err != nil { + return nil, err + } + + members, err := db.SMembers(key) + if err != nil { + return nil, err + } else if len(members) == 0 { + return nil, err + } + + tempMap := make(map[string]bool) + for _, member := range members { + if err := checkKeySize(member); err != nil { + return nil, err + } + if _, ok := destMap[String(member)]; ok { + tempMap[String(member)] = true //mark this item as selected + } + } + destMap = tempMap //reduce the size of the result set + if len(destMap) == 0 { + return nil, nil + } + } + + slice := make([][]byte, len(destMap)) + idx := 0 + for k, v := range destMap { + if !v { + continue + } + + slice[idx] = []byte(k) + idx++ + } + + return slice, nil + +} + +func (db *DB) SInter(keys ...[]byte) ([][]byte, error) { + v, err := db.sInterGeneric(keys...) + return v, err + +} + +func (db *DB) SInterStore(dstKey []byte, keys ...[]byte) (int64, error) { + n, err := db.sStoreGeneric(dstKey, InterType, keys...) + return n, err +} + +func (db *DB) SIsMember(key []byte, member []byte) (int64, error) { + ek := db.sEncodeSetKey(key, member) + + var n int64 = 1 + if v, err := db.bucket.Get(ek); err != nil { + return 0, err + } else if v == nil { + n = 0 + } + return n, nil +} + +func (db *DB) SMembers(key []byte) ([][]byte, error) { + if err := checkKeySize(key); err != nil { + return nil, err + } + + start := db.sEncodeStartKey(key) + stop := db.sEncodeStopKey(key) + + v := make([][]byte, 0, 16) + + it := db.bucket.RangeLimitIterator(start, stop, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + _, m, err := db.sDecodeSetKey(it.Key()) + if err != nil { + return nil, err + } + + v = append(v, m) + } + + it.Close() + + return v, nil +} + +func (db *DB) SRem(key []byte, args ...[]byte) (int64, error) { + t := db.setBatch + t.Lock() + defer t.Unlock() + + var ek []byte + var v []byte + var err error + + it := db.bucket.NewIterator() + defer it.Close() + + var num int64 = 0 + for i := 0; i < len(args); i++ { + if err := checkSetKMSize(key, args[i]); err != nil { + return 0, err + } + + ek = db.sEncodeSetKey(key, args[i]) + + v = it.RawFind(ek) + if v == nil { + continue + } else { + num++ + t.Delete(ek) + } + } + + if _, err = db.sIncrSize(key, -num); err != nil { + return 0, err + } + + err = t.Commit() + return num, err + +} + +func (db *DB) sUnionGeneric(keys ...[]byte) ([][]byte, error) { + dstMap := make(map[string]bool) + + for _, key := range keys { + if err := checkKeySize(key); err != nil { + return nil, err + } + + members, err := db.SMembers(key) + if err != nil { + return nil, err + } + + for _, member := range members { + dstMap[String(member)] = true + } + } + + slice := make([][]byte, len(dstMap)) + idx := 0 + for k, v := range dstMap { + if !v { + continue + } + slice[idx] = []byte(k) + idx++ + } + + return slice, nil +} + +func (db *DB) SUnion(keys ...[]byte) ([][]byte, error) { + v, err := db.sUnionGeneric(keys...) + return v, err +} + +func (db *DB) SUnionStore(dstKey []byte, keys ...[]byte) (int64, error) { + n, err := db.sStoreGeneric(dstKey, UnionType, keys...) + return n, err +} + +func (db *DB) sStoreGeneric(dstKey []byte, optType byte, keys ...[]byte) (int64, error) { + if err := checkKeySize(dstKey); err != nil { + return 0, err + } + + t := db.setBatch + t.Lock() + defer t.Unlock() + + db.sDelete(t, dstKey) + + var err error + var ek []byte + var v [][]byte + + switch optType { + case UnionType: + v, err = db.sUnionGeneric(keys...) + case DiffType: + v, err = db.sDiffGeneric(keys...) + case InterType: + v, err = db.sInterGeneric(keys...) + } + + if err != nil { + return 0, err + } + + for _, m := range v { + if err := checkSetKMSize(dstKey, m); err != nil { + return 0, err + } + + ek = db.sEncodeSetKey(dstKey, m) + + if _, err := db.bucket.Get(ek); err != nil { + return 0, err + } + + t.Put(ek, nil) + } + + var num = int64(len(v)) + sk := db.sEncodeSizeKey(dstKey) + t.Put(sk, PutInt64(num)) + + if err = t.Commit(); err != nil { + return 0, err + } + return num, nil +} + +func (db *DB) SClear(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.setBatch + t.Lock() + defer t.Unlock() + + num := db.sDelete(t, key) + db.rmExpire(t, SetType, key) + + err := t.Commit() + return num, err +} + +func (db *DB) SMclear(keys ...[]byte) (int64, error) { + t := db.setBatch + t.Lock() + defer t.Unlock() + + for _, key := range keys { + if err := checkKeySize(key); err != nil { + return 0, err + } + + db.sDelete(t, key) + db.rmExpire(t, SetType, key) + } + + err := t.Commit() + return int64(len(keys)), err +} + +func (db *DB) SExpire(key []byte, duration int64) (int64, error) { + if duration <= 0 { + return 0, errExpireValue + } + + return db.sExpireAt(key, time.Now().Unix()+duration) + +} + +func (db *DB) SExpireAt(key []byte, when int64) (int64, error) { + if when <= time.Now().Unix() { + return 0, errExpireValue + } + + return db.sExpireAt(key, when) + +} + +func (db *DB) STTL(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.ttl(SetType, key) +} + +func (db *DB) SPersist(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.setBatch + t.Lock() + defer t.Unlock() + + n, err := db.rmExpire(t, SetType, key) + if err != nil { + return 0, err + } + err = t.Commit() + return n, err +} + +func (db *DB) SScan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { + return db.scan(SSizeType, key, count, inclusive, match) +} diff --git a/vendor/github.com/lunny/nodb/t_ttl.go b/vendor/github.com/lunny/nodb/t_ttl.go new file mode 100644 index 0000000000..5c3638891c --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_ttl.go @@ -0,0 +1,195 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "time" + + "github.com/lunny/nodb/store" +) + +var ( + errExpMetaKey = errors.New("invalid expire meta key") + errExpTimeKey = errors.New("invalid expire time key") +) + +type retireCallback func(*batch, []byte) int64 + +type elimination struct { + db *DB + exp2Tx []*batch + exp2Retire []retireCallback +} + +var errExpType = errors.New("invalid expire type") + +func (db *DB) expEncodeTimeKey(dataType byte, key []byte, when int64) []byte { + buf := make([]byte, len(key)+11) + + buf[0] = db.index + buf[1] = ExpTimeType + buf[2] = dataType + pos := 3 + + binary.BigEndian.PutUint64(buf[pos:], uint64(when)) + pos += 8 + + copy(buf[pos:], key) + + return buf +} + +func (db *DB) expEncodeMetaKey(dataType byte, key []byte) []byte { + buf := make([]byte, len(key)+3) + + buf[0] = db.index + buf[1] = ExpMetaType + buf[2] = dataType + pos := 3 + + copy(buf[pos:], key) + + return buf +} + +func (db *DB) expDecodeMetaKey(mk []byte) (byte, []byte, error) { + if len(mk) <= 3 || mk[0] != db.index || mk[1] != ExpMetaType { + return 0, nil, errExpMetaKey + } + + return mk[2], mk[3:], nil +} + +func (db *DB) expDecodeTimeKey(tk []byte) (byte, []byte, int64, error) { + if len(tk) < 11 || tk[0] != db.index || tk[1] != ExpTimeType { + return 0, nil, 0, errExpTimeKey + } + + return tk[2], tk[11:], int64(binary.BigEndian.Uint64(tk[3:])), nil +} + +func (db *DB) expire(t *batch, dataType byte, key []byte, duration int64) { + db.expireAt(t, dataType, key, time.Now().Unix()+duration) +} + +func (db *DB) expireAt(t *batch, dataType byte, key []byte, when int64) { + mk := db.expEncodeMetaKey(dataType, key) + tk := db.expEncodeTimeKey(dataType, key, when) + + t.Put(tk, mk) + t.Put(mk, PutInt64(when)) +} + +func (db *DB) ttl(dataType byte, key []byte) (t int64, err error) { + mk := db.expEncodeMetaKey(dataType, key) + + if t, err = Int64(db.bucket.Get(mk)); err != nil || t == 0 { + t = -1 + } else { + t -= time.Now().Unix() + if t <= 0 { + t = -1 + } + // if t == -1 : to remove ???? + } + + return t, err +} + +func (db *DB) rmExpire(t *batch, dataType byte, key []byte) (int64, error) { + mk := db.expEncodeMetaKey(dataType, key) + if v, err := db.bucket.Get(mk); err != nil { + return 0, err + } else if v == nil { + return 0, nil + } else if when, err2 := Int64(v, nil); err2 != nil { + return 0, err2 + } else { + tk := db.expEncodeTimeKey(dataType, key, when) + t.Delete(mk) + t.Delete(tk) + return 1, nil + } +} + +func (db *DB) expFlush(t *batch, dataType byte) (err error) { + minKey := make([]byte, 3) + minKey[0] = db.index + minKey[1] = ExpTimeType + minKey[2] = dataType + + maxKey := make([]byte, 3) + maxKey[0] = db.index + maxKey[1] = ExpMetaType + maxKey[2] = dataType + 1 + + _, err = db.flushRegion(t, minKey, maxKey) + err = t.Commit() + return +} + +////////////////////////////////////////////////////////// +// +////////////////////////////////////////////////////////// + +func newEliminator(db *DB) *elimination { + eli := new(elimination) + eli.db = db + eli.exp2Tx = make([]*batch, maxDataType) + eli.exp2Retire = make([]retireCallback, maxDataType) + return eli +} + +func (eli *elimination) regRetireContext(dataType byte, t *batch, onRetire retireCallback) { + + // todo .. need to ensure exist - mapExpMetaType[expType] + + eli.exp2Tx[dataType] = t + eli.exp2Retire[dataType] = onRetire +} + +// call by outside ... (from *db to another *db) +func (eli *elimination) active() { + now := time.Now().Unix() + db := eli.db + dbGet := db.bucket.Get + + minKey := db.expEncodeTimeKey(NoneType, nil, 0) + maxKey := db.expEncodeTimeKey(maxDataType, nil, now) + + it := db.bucket.RangeLimitIterator(minKey, maxKey, store.RangeROpen, 0, -1) + for ; it.Valid(); it.Next() { + tk := it.RawKey() + mk := it.RawValue() + + dt, k, _, err := db.expDecodeTimeKey(tk) + if err != nil { + continue + } + + t := eli.exp2Tx[dt] + onRetire := eli.exp2Retire[dt] + if tk == nil || onRetire == nil { + continue + } + + t.Lock() + + if exp, err := Int64(dbGet(mk)); err == nil { + // check expire again + if exp <= now { + onRetire(t, k) + t.Delete(tk) + t.Delete(mk) + + t.Commit() + } + + } + + t.Unlock() + } + it.Close() + + return +} diff --git a/vendor/github.com/lunny/nodb/t_zset.go b/vendor/github.com/lunny/nodb/t_zset.go new file mode 100644 index 0000000000..d0ffb7ccf3 --- /dev/null +++ b/vendor/github.com/lunny/nodb/t_zset.go @@ -0,0 +1,943 @@ +package nodb + +import ( + "bytes" + "encoding/binary" + "errors" + "time" + + "github.com/lunny/nodb/store" +) + +const ( + MinScore int64 = -1<<63 + 1 + MaxScore int64 = 1<<63 - 1 + InvalidScore int64 = -1 << 63 + + AggregateSum byte = 0 + AggregateMin byte = 1 + AggregateMax byte = 2 +) + +type ScorePair struct { + Score int64 + Member []byte +} + +var errZSizeKey = errors.New("invalid zsize key") +var errZSetKey = errors.New("invalid zset key") +var errZScoreKey = errors.New("invalid zscore key") +var errScoreOverflow = errors.New("zset score overflow") +var errInvalidAggregate = errors.New("invalid aggregate") +var errInvalidWeightNum = errors.New("invalid weight number") +var errInvalidSrcKeyNum = errors.New("invalid src key number") + +const ( + zsetNScoreSep byte = '<' + zsetPScoreSep byte = zsetNScoreSep + 1 + zsetStopScoreSep byte = zsetPScoreSep + 1 + + zsetStartMemSep byte = ':' + zsetStopMemSep byte = zsetStartMemSep + 1 +) + +func checkZSetKMSize(key []byte, member []byte) error { + if len(key) > MaxKeySize || len(key) == 0 { + return errKeySize + } else if len(member) > MaxZSetMemberSize || len(member) == 0 { + return errZSetMemberSize + } + return nil +} + +func (db *DB) zEncodeSizeKey(key []byte) []byte { + buf := make([]byte, len(key)+2) + buf[0] = db.index + buf[1] = ZSizeType + + copy(buf[2:], key) + return buf +} + +func (db *DB) zDecodeSizeKey(ek []byte) ([]byte, error) { + if len(ek) < 2 || ek[0] != db.index || ek[1] != ZSizeType { + return nil, errZSizeKey + } + + return ek[2:], nil +} + +func (db *DB) zEncodeSetKey(key []byte, member []byte) []byte { + buf := make([]byte, len(key)+len(member)+5) + + pos := 0 + buf[pos] = db.index + pos++ + + buf[pos] = ZSetType + pos++ + + binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) + pos += 2 + + copy(buf[pos:], key) + pos += len(key) + + buf[pos] = zsetStartMemSep + pos++ + + copy(buf[pos:], member) + + return buf +} + +func (db *DB) zDecodeSetKey(ek []byte) ([]byte, []byte, error) { + if len(ek) < 5 || ek[0] != db.index || ek[1] != ZSetType { + return nil, nil, errZSetKey + } + + keyLen := int(binary.BigEndian.Uint16(ek[2:])) + if keyLen+5 > len(ek) { + return nil, nil, errZSetKey + } + + key := ek[4 : 4+keyLen] + + if ek[4+keyLen] != zsetStartMemSep { + return nil, nil, errZSetKey + } + + member := ek[5+keyLen:] + return key, member, nil +} + +func (db *DB) zEncodeStartSetKey(key []byte) []byte { + k := db.zEncodeSetKey(key, nil) + return k +} + +func (db *DB) zEncodeStopSetKey(key []byte) []byte { + k := db.zEncodeSetKey(key, nil) + k[len(k)-1] = zsetStartMemSep + 1 + return k +} + +func (db *DB) zEncodeScoreKey(key []byte, member []byte, score int64) []byte { + buf := make([]byte, len(key)+len(member)+14) + + pos := 0 + buf[pos] = db.index + pos++ + + buf[pos] = ZScoreType + pos++ + + binary.BigEndian.PutUint16(buf[pos:], uint16(len(key))) + pos += 2 + + copy(buf[pos:], key) + pos += len(key) + + if score < 0 { + buf[pos] = zsetNScoreSep + } else { + buf[pos] = zsetPScoreSep + } + + pos++ + binary.BigEndian.PutUint64(buf[pos:], uint64(score)) + pos += 8 + + buf[pos] = zsetStartMemSep + pos++ + + copy(buf[pos:], member) + return buf +} + +func (db *DB) zEncodeStartScoreKey(key []byte, score int64) []byte { + return db.zEncodeScoreKey(key, nil, score) +} + +func (db *DB) zEncodeStopScoreKey(key []byte, score int64) []byte { + k := db.zEncodeScoreKey(key, nil, score) + k[len(k)-1] = zsetStopMemSep + return k +} + +func (db *DB) zDecodeScoreKey(ek []byte) (key []byte, member []byte, score int64, err error) { + if len(ek) < 14 || ek[0] != db.index || ek[1] != ZScoreType { + err = errZScoreKey + return + } + + keyLen := int(binary.BigEndian.Uint16(ek[2:])) + if keyLen+14 > len(ek) { + err = errZScoreKey + return + } + + key = ek[4 : 4+keyLen] + pos := 4 + keyLen + + if (ek[pos] != zsetNScoreSep) && (ek[pos] != zsetPScoreSep) { + err = errZScoreKey + return + } + pos++ + + score = int64(binary.BigEndian.Uint64(ek[pos:])) + pos += 8 + + if ek[pos] != zsetStartMemSep { + err = errZScoreKey + return + } + + pos++ + + member = ek[pos:] + return +} + +func (db *DB) zSetItem(t *batch, key []byte, score int64, member []byte) (int64, error) { + if score <= MinScore || score >= MaxScore { + return 0, errScoreOverflow + } + + var exists int64 = 0 + ek := db.zEncodeSetKey(key, member) + + if v, err := db.bucket.Get(ek); err != nil { + return 0, err + } else if v != nil { + exists = 1 + + if s, err := Int64(v, err); err != nil { + return 0, err + } else { + sk := db.zEncodeScoreKey(key, member, s) + t.Delete(sk) + } + } + + t.Put(ek, PutInt64(score)) + + sk := db.zEncodeScoreKey(key, member, score) + t.Put(sk, []byte{}) + + return exists, nil +} + +func (db *DB) zDelItem(t *batch, key []byte, member []byte, skipDelScore bool) (int64, error) { + ek := db.zEncodeSetKey(key, member) + if v, err := db.bucket.Get(ek); err != nil { + return 0, err + } else if v == nil { + //not exists + return 0, nil + } else { + //exists + if !skipDelScore { + //we must del score + if s, err := Int64(v, err); err != nil { + return 0, err + } else { + sk := db.zEncodeScoreKey(key, member, s) + t.Delete(sk) + } + } + } + + t.Delete(ek) + + return 1, nil +} + +func (db *DB) zDelete(t *batch, key []byte) int64 { + delMembCnt, _ := db.zRemRange(t, key, MinScore, MaxScore, 0, -1) + // todo : log err + return delMembCnt +} + +func (db *DB) zExpireAt(key []byte, when int64) (int64, error) { + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + if zcnt, err := db.ZCard(key); err != nil || zcnt == 0 { + return 0, err + } else { + db.expireAt(t, ZSetType, key, when) + if err := t.Commit(); err != nil { + return 0, err + } + } + return 1, nil +} + +func (db *DB) ZAdd(key []byte, args ...ScorePair) (int64, error) { + if len(args) == 0 { + return 0, nil + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + var num int64 = 0 + for i := 0; i < len(args); i++ { + score := args[i].Score + member := args[i].Member + + if err := checkZSetKMSize(key, member); err != nil { + return 0, err + } + + if n, err := db.zSetItem(t, key, score, member); err != nil { + return 0, err + } else if n == 0 { + //add new + num++ + } + } + + if _, err := db.zIncrSize(t, key, num); err != nil { + return 0, err + } + + //todo add binlog + err := t.Commit() + return num, err +} + +func (db *DB) zIncrSize(t *batch, key []byte, delta int64) (int64, error) { + sk := db.zEncodeSizeKey(key) + + size, err := Int64(db.bucket.Get(sk)) + if err != nil { + return 0, err + } else { + size += delta + if size <= 0 { + size = 0 + t.Delete(sk) + db.rmExpire(t, ZSetType, key) + } else { + t.Put(sk, PutInt64(size)) + } + } + + return size, nil +} + +func (db *DB) ZCard(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + sk := db.zEncodeSizeKey(key) + return Int64(db.bucket.Get(sk)) +} + +func (db *DB) ZScore(key []byte, member []byte) (int64, error) { + if err := checkZSetKMSize(key, member); err != nil { + return InvalidScore, err + } + + var score int64 = InvalidScore + + k := db.zEncodeSetKey(key, member) + if v, err := db.bucket.Get(k); err != nil { + return InvalidScore, err + } else if v == nil { + return InvalidScore, ErrScoreMiss + } else { + if score, err = Int64(v, nil); err != nil { + return InvalidScore, err + } + } + + return score, nil +} + +func (db *DB) ZRem(key []byte, members ...[]byte) (int64, error) { + if len(members) == 0 { + return 0, nil + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + var num int64 = 0 + for i := 0; i < len(members); i++ { + if err := checkZSetKMSize(key, members[i]); err != nil { + return 0, err + } + + if n, err := db.zDelItem(t, key, members[i], false); err != nil { + return 0, err + } else if n == 1 { + num++ + } + } + + if _, err := db.zIncrSize(t, key, -num); err != nil { + return 0, err + } + + err := t.Commit() + return num, err +} + +func (db *DB) ZIncrBy(key []byte, delta int64, member []byte) (int64, error) { + if err := checkZSetKMSize(key, member); err != nil { + return InvalidScore, err + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + ek := db.zEncodeSetKey(key, member) + + var oldScore int64 = 0 + v, err := db.bucket.Get(ek) + if err != nil { + return InvalidScore, err + } else if v == nil { + db.zIncrSize(t, key, 1) + } else { + if oldScore, err = Int64(v, err); err != nil { + return InvalidScore, err + } + } + + newScore := oldScore + delta + if newScore >= MaxScore || newScore <= MinScore { + return InvalidScore, errScoreOverflow + } + + sk := db.zEncodeScoreKey(key, member, newScore) + t.Put(sk, []byte{}) + t.Put(ek, PutInt64(newScore)) + + if v != nil { + // so as to update score, we must delete the old one + oldSk := db.zEncodeScoreKey(key, member, oldScore) + t.Delete(oldSk) + } + + err = t.Commit() + return newScore, err +} + +func (db *DB) ZCount(key []byte, min int64, max int64) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + minKey := db.zEncodeStartScoreKey(key, min) + maxKey := db.zEncodeStopScoreKey(key, max) + + rangeType := store.RangeROpen + + it := db.bucket.RangeLimitIterator(minKey, maxKey, rangeType, 0, -1) + var n int64 = 0 + for ; it.Valid(); it.Next() { + n++ + } + it.Close() + + return n, nil +} + +func (db *DB) zrank(key []byte, member []byte, reverse bool) (int64, error) { + if err := checkZSetKMSize(key, member); err != nil { + return 0, err + } + + k := db.zEncodeSetKey(key, member) + + it := db.bucket.NewIterator() + defer it.Close() + + if v := it.Find(k); v == nil { + return -1, nil + } else { + if s, err := Int64(v, nil); err != nil { + return 0, err + } else { + var rit *store.RangeLimitIterator + + sk := db.zEncodeScoreKey(key, member, s) + + if !reverse { + minKey := db.zEncodeStartScoreKey(key, MinScore) + + rit = store.NewRangeIterator(it, &store.Range{minKey, sk, store.RangeClose}) + } else { + maxKey := db.zEncodeStopScoreKey(key, MaxScore) + rit = store.NewRevRangeIterator(it, &store.Range{sk, maxKey, store.RangeClose}) + } + + var lastKey []byte = nil + var n int64 = 0 + + for ; rit.Valid(); rit.Next() { + n++ + + lastKey = rit.BufKey(lastKey) + } + + if _, m, _, err := db.zDecodeScoreKey(lastKey); err == nil && bytes.Equal(m, member) { + n-- + return n, nil + } + } + } + + return -1, nil +} + +func (db *DB) zIterator(key []byte, min int64, max int64, offset int, count int, reverse bool) *store.RangeLimitIterator { + minKey := db.zEncodeStartScoreKey(key, min) + maxKey := db.zEncodeStopScoreKey(key, max) + + if !reverse { + return db.bucket.RangeLimitIterator(minKey, maxKey, store.RangeClose, offset, count) + } else { + return db.bucket.RevRangeLimitIterator(minKey, maxKey, store.RangeClose, offset, count) + } +} + +func (db *DB) zRemRange(t *batch, key []byte, min int64, max int64, offset int, count int) (int64, error) { + if len(key) > MaxKeySize { + return 0, errKeySize + } + + it := db.zIterator(key, min, max, offset, count, false) + var num int64 = 0 + for ; it.Valid(); it.Next() { + sk := it.RawKey() + _, m, _, err := db.zDecodeScoreKey(sk) + if err != nil { + continue + } + + if n, err := db.zDelItem(t, key, m, true); err != nil { + return 0, err + } else if n == 1 { + num++ + } + + t.Delete(sk) + } + it.Close() + + if _, err := db.zIncrSize(t, key, -num); err != nil { + return 0, err + } + + return num, nil +} + +func (db *DB) zRange(key []byte, min int64, max int64, offset int, count int, reverse bool) ([]ScorePair, error) { + if len(key) > MaxKeySize { + return nil, errKeySize + } + + if offset < 0 { + return []ScorePair{}, nil + } + + nv := 64 + if count > 0 { + nv = count + } + + v := make([]ScorePair, 0, nv) + + var it *store.RangeLimitIterator + + //if reverse and offset is 0, count < 0, we may use forward iterator then reverse + //because store iterator prev is slower than next + if !reverse || (offset == 0 && count < 0) { + it = db.zIterator(key, min, max, offset, count, false) + } else { + it = db.zIterator(key, min, max, offset, count, true) + } + + for ; it.Valid(); it.Next() { + _, m, s, err := db.zDecodeScoreKey(it.Key()) + //may be we will check key equal? + if err != nil { + continue + } + + v = append(v, ScorePair{Member: m, Score: s}) + } + it.Close() + + if reverse && (offset == 0 && count < 0) { + for i, j := 0, len(v)-1; i < j; i, j = i+1, j-1 { + v[i], v[j] = v[j], v[i] + } + } + + return v, nil +} + +func (db *DB) zParseLimit(key []byte, start int, stop int) (offset int, count int, err error) { + if start < 0 || stop < 0 { + //refer redis implementation + var size int64 + size, err = db.ZCard(key) + if err != nil { + return + } + + llen := int(size) + + if start < 0 { + start = llen + start + } + if stop < 0 { + stop = llen + stop + } + + if start < 0 { + start = 0 + } + + if start >= llen { + offset = -1 + return + } + } + + if start > stop { + offset = -1 + return + } + + offset = start + count = (stop - start) + 1 + return +} + +func (db *DB) ZClear(key []byte) (int64, error) { + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + rmCnt, err := db.zRemRange(t, key, MinScore, MaxScore, 0, -1) + if err == nil { + err = t.Commit() + } + + return rmCnt, err +} + +func (db *DB) ZMclear(keys ...[]byte) (int64, error) { + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + for _, key := range keys { + if _, err := db.zRemRange(t, key, MinScore, MaxScore, 0, -1); err != nil { + return 0, err + } + } + + err := t.Commit() + + return int64(len(keys)), err +} + +func (db *DB) ZRange(key []byte, start int, stop int) ([]ScorePair, error) { + return db.ZRangeGeneric(key, start, stop, false) +} + +//min and max must be inclusive +//if no limit, set offset = 0 and count = -1 +func (db *DB) ZRangeByScore(key []byte, min int64, max int64, + offset int, count int) ([]ScorePair, error) { + return db.ZRangeByScoreGeneric(key, min, max, offset, count, false) +} + +func (db *DB) ZRank(key []byte, member []byte) (int64, error) { + return db.zrank(key, member, false) +} + +func (db *DB) ZRemRangeByRank(key []byte, start int, stop int) (int64, error) { + offset, count, err := db.zParseLimit(key, start, stop) + if err != nil { + return 0, err + } + + var rmCnt int64 + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + rmCnt, err = db.zRemRange(t, key, MinScore, MaxScore, offset, count) + if err == nil { + err = t.Commit() + } + + return rmCnt, err +} + +//min and max must be inclusive +func (db *DB) ZRemRangeByScore(key []byte, min int64, max int64) (int64, error) { + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + rmCnt, err := db.zRemRange(t, key, min, max, 0, -1) + if err == nil { + err = t.Commit() + } + + return rmCnt, err +} + +func (db *DB) ZRevRange(key []byte, start int, stop int) ([]ScorePair, error) { + return db.ZRangeGeneric(key, start, stop, true) +} + +func (db *DB) ZRevRank(key []byte, member []byte) (int64, error) { + return db.zrank(key, member, true) +} + +//min and max must be inclusive +//if no limit, set offset = 0 and count = -1 +func (db *DB) ZRevRangeByScore(key []byte, min int64, max int64, offset int, count int) ([]ScorePair, error) { + return db.ZRangeByScoreGeneric(key, min, max, offset, count, true) +} + +func (db *DB) ZRangeGeneric(key []byte, start int, stop int, reverse bool) ([]ScorePair, error) { + offset, count, err := db.zParseLimit(key, start, stop) + if err != nil { + return nil, err + } + + return db.zRange(key, MinScore, MaxScore, offset, count, reverse) +} + +//min and max must be inclusive +//if no limit, set offset = 0 and count = -1 +func (db *DB) ZRangeByScoreGeneric(key []byte, min int64, max int64, + offset int, count int, reverse bool) ([]ScorePair, error) { + + return db.zRange(key, min, max, offset, count, reverse) +} + +func (db *DB) zFlush() (drop int64, err error) { + t := db.zsetBatch + t.Lock() + defer t.Unlock() + return db.flushType(t, ZSetType) +} + +func (db *DB) ZExpire(key []byte, duration int64) (int64, error) { + if duration <= 0 { + return 0, errExpireValue + } + + return db.zExpireAt(key, time.Now().Unix()+duration) +} + +func (db *DB) ZExpireAt(key []byte, when int64) (int64, error) { + if when <= time.Now().Unix() { + return 0, errExpireValue + } + + return db.zExpireAt(key, when) +} + +func (db *DB) ZTTL(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return -1, err + } + + return db.ttl(ZSetType, key) +} + +func (db *DB) ZPersist(key []byte) (int64, error) { + if err := checkKeySize(key); err != nil { + return 0, err + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + n, err := db.rmExpire(t, ZSetType, key) + if err != nil { + return 0, err + } + + err = t.Commit() + return n, err +} + +func getAggregateFunc(aggregate byte) func(int64, int64) int64 { + switch aggregate { + case AggregateSum: + return func(a int64, b int64) int64 { + return a + b + } + case AggregateMax: + return func(a int64, b int64) int64 { + if a > b { + return a + } + return b + } + case AggregateMin: + return func(a int64, b int64) int64 { + if a > b { + return b + } + return a + } + } + return nil +} + +func (db *DB) ZUnionStore(destKey []byte, srcKeys [][]byte, weights []int64, aggregate byte) (int64, error) { + + var destMap = map[string]int64{} + aggregateFunc := getAggregateFunc(aggregate) + if aggregateFunc == nil { + return 0, errInvalidAggregate + } + if len(srcKeys) < 1 { + return 0, errInvalidSrcKeyNum + } + if weights != nil { + if len(srcKeys) != len(weights) { + return 0, errInvalidWeightNum + } + } else { + weights = make([]int64, len(srcKeys)) + for i := 0; i < len(weights); i++ { + weights[i] = 1 + } + } + + for i, key := range srcKeys { + scorePairs, err := db.ZRange(key, 0, -1) + if err != nil { + return 0, err + } + for _, pair := range scorePairs { + if score, ok := destMap[String(pair.Member)]; !ok { + destMap[String(pair.Member)] = pair.Score * weights[i] + } else { + destMap[String(pair.Member)] = aggregateFunc(score, pair.Score*weights[i]) + } + } + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + db.zDelete(t, destKey) + + for member, score := range destMap { + if err := checkZSetKMSize(destKey, []byte(member)); err != nil { + return 0, err + } + + if _, err := db.zSetItem(t, destKey, score, []byte(member)); err != nil { + return 0, err + } + } + + var num = int64(len(destMap)) + sk := db.zEncodeSizeKey(destKey) + t.Put(sk, PutInt64(num)) + + //todo add binlog + if err := t.Commit(); err != nil { + return 0, err + } + return num, nil +} + +func (db *DB) ZInterStore(destKey []byte, srcKeys [][]byte, weights []int64, aggregate byte) (int64, error) { + + aggregateFunc := getAggregateFunc(aggregate) + if aggregateFunc == nil { + return 0, errInvalidAggregate + } + if len(srcKeys) < 1 { + return 0, errInvalidSrcKeyNum + } + if weights != nil { + if len(srcKeys) != len(weights) { + return 0, errInvalidWeightNum + } + } else { + weights = make([]int64, len(srcKeys)) + for i := 0; i < len(weights); i++ { + weights[i] = 1 + } + } + + var destMap = map[string]int64{} + scorePairs, err := db.ZRange(srcKeys[0], 0, -1) + if err != nil { + return 0, err + } + for _, pair := range scorePairs { + destMap[String(pair.Member)] = pair.Score * weights[0] + } + + for i, key := range srcKeys[1:] { + scorePairs, err := db.ZRange(key, 0, -1) + if err != nil { + return 0, err + } + tmpMap := map[string]int64{} + for _, pair := range scorePairs { + if score, ok := destMap[String(pair.Member)]; ok { + tmpMap[String(pair.Member)] = aggregateFunc(score, pair.Score*weights[i+1]) + } + } + destMap = tmpMap + } + + t := db.zsetBatch + t.Lock() + defer t.Unlock() + + db.zDelete(t, destKey) + + for member, score := range destMap { + if err := checkZSetKMSize(destKey, []byte(member)); err != nil { + return 0, err + } + if _, err := db.zSetItem(t, destKey, score, []byte(member)); err != nil { + return 0, err + } + } + + var num int64 = int64(len(destMap)) + sk := db.zEncodeSizeKey(destKey) + t.Put(sk, PutInt64(num)) + //todo add binlog + if err := t.Commit(); err != nil { + return 0, err + } + return num, nil +} + +func (db *DB) ZScan(key []byte, count int, inclusive bool, match string) ([][]byte, error) { + return db.scan(ZSizeType, key, count, inclusive, match) +} diff --git a/vendor/github.com/lunny/nodb/tx.go b/vendor/github.com/lunny/nodb/tx.go new file mode 100644 index 0000000000..5ce99db57a --- /dev/null +++ b/vendor/github.com/lunny/nodb/tx.go @@ -0,0 +1,113 @@ +package nodb + +import ( + "errors" + "fmt" + + "github.com/lunny/nodb/store" +) + +var ( + ErrNestTx = errors.New("nest transaction not supported") + ErrTxDone = errors.New("Transaction has already been committed or rolled back") +) + +type Tx struct { + *DB + + tx *store.Tx + + logs [][]byte +} + +func (db *DB) IsTransaction() bool { + return db.status == DBInTransaction +} + +// Begin a transaction, it will block all other write operations before calling Commit or Rollback. +// You must be very careful to prevent long-time transaction. +func (db *DB) Begin() (*Tx, error) { + if db.IsTransaction() { + return nil, ErrNestTx + } + + tx := new(Tx) + + tx.DB = new(DB) + tx.DB.l = db.l + + tx.l.wLock.Lock() + + tx.DB.sdb = db.sdb + + var err error + tx.tx, err = db.sdb.Begin() + if err != nil { + tx.l.wLock.Unlock() + return nil, err + } + + tx.DB.bucket = tx.tx + + tx.DB.status = DBInTransaction + + tx.DB.index = db.index + + tx.DB.kvBatch = tx.newBatch() + tx.DB.listBatch = tx.newBatch() + tx.DB.hashBatch = tx.newBatch() + tx.DB.zsetBatch = tx.newBatch() + tx.DB.binBatch = tx.newBatch() + tx.DB.setBatch = tx.newBatch() + + return tx, nil +} + +func (tx *Tx) Commit() error { + if tx.tx == nil { + return ErrTxDone + } + + tx.l.commitLock.Lock() + err := tx.tx.Commit() + tx.tx = nil + + if len(tx.logs) > 0 { + tx.l.binlog.Log(tx.logs...) + } + + tx.l.commitLock.Unlock() + + tx.l.wLock.Unlock() + + tx.DB.bucket = nil + + return err +} + +func (tx *Tx) Rollback() error { + if tx.tx == nil { + return ErrTxDone + } + + err := tx.tx.Rollback() + tx.tx = nil + + tx.l.wLock.Unlock() + tx.DB.bucket = nil + + return err +} + +func (tx *Tx) newBatch() *batch { + return tx.l.newBatch(tx.tx.NewWriteBatch(), &txBatchLocker{}, tx) +} + +func (tx *Tx) Select(index int) error { + if index < 0 || index >= int(MaxDBNumber) { + return fmt.Errorf("invalid db index %d", index) + } + + tx.DB.index = uint8(index) + return nil +} diff --git a/vendor/github.com/lunny/nodb/util.go b/vendor/github.com/lunny/nodb/util.go new file mode 100644 index 0000000000..d5949a96e6 --- /dev/null +++ b/vendor/github.com/lunny/nodb/util.go @@ -0,0 +1,113 @@ +package nodb + +import ( + "encoding/binary" + "errors" + "reflect" + "strconv" + "unsafe" +) + +var errIntNumber = errors.New("invalid integer") + +// no copy to change slice to string +// use your own risk +func String(b []byte) (s string) { + pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + pstring := (*reflect.StringHeader)(unsafe.Pointer(&s)) + pstring.Data = pbytes.Data + pstring.Len = pbytes.Len + return +} + +// no copy to change string to slice +// use your own risk +func Slice(s string) (b []byte) { + pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + pstring := (*reflect.StringHeader)(unsafe.Pointer(&s)) + pbytes.Data = pstring.Data + pbytes.Len = pstring.Len + pbytes.Cap = pstring.Len + return +} + +func Int64(v []byte, err error) (int64, error) { + if err != nil { + return 0, err + } else if v == nil || len(v) == 0 { + return 0, nil + } else if len(v) != 8 { + return 0, errIntNumber + } + + return int64(binary.LittleEndian.Uint64(v)), nil +} + +func PutInt64(v int64) []byte { + var b []byte + pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + pbytes.Data = uintptr(unsafe.Pointer(&v)) + pbytes.Len = 8 + pbytes.Cap = 8 + return b +} + +func StrInt64(v []byte, err error) (int64, error) { + if err != nil { + return 0, err + } else if v == nil { + return 0, nil + } else { + return strconv.ParseInt(String(v), 10, 64) + } +} + +func StrInt32(v []byte, err error) (int32, error) { + if err != nil { + return 0, err + } else if v == nil { + return 0, nil + } else { + res, err := strconv.ParseInt(String(v), 10, 32) + return int32(res), err + } +} + +func StrInt8(v []byte, err error) (int8, error) { + if err != nil { + return 0, err + } else if v == nil { + return 0, nil + } else { + res, err := strconv.ParseInt(String(v), 10, 8) + return int8(res), err + } +} + +func StrPutInt64(v int64) []byte { + return strconv.AppendInt(nil, v, 10) +} + +func MinUInt32(a uint32, b uint32) uint32 { + if a > b { + return b + } else { + return a + } +} + +func MaxUInt32(a uint32, b uint32) uint32 { + if a > b { + return a + } else { + return b + } +} + +func MaxInt32(a int32, b int32) int32 { + if a > b { + return a + } else { + return b + } +} diff --git a/vendor/github.com/pkg/errors/LICENSE b/vendor/github.com/pkg/errors/LICENSE new file mode 100644 index 0000000000..835ba3e755 --- /dev/null +++ b/vendor/github.com/pkg/errors/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) 2015, Dave Cheney <dave@cheney.net> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/pkg/errors/errors.go b/vendor/github.com/pkg/errors/errors.go new file mode 100644 index 0000000000..7421f326ff --- /dev/null +++ b/vendor/github.com/pkg/errors/errors.go @@ -0,0 +1,282 @@ +// Package errors provides simple error handling primitives. +// +// The traditional error handling idiom in Go is roughly akin to +// +// if err != nil { +// return err +// } +// +// which when applied recursively up the call stack results in error reports +// without context or debugging information. The errors package allows +// programmers to add context to the failure path in their code in a way +// that does not destroy the original value of the error. +// +// Adding context to an error +// +// The errors.Wrap function returns a new error that adds context to the +// original error by recording a stack trace at the point Wrap is called, +// together with the supplied message. For example +// +// _, err := ioutil.ReadAll(r) +// if err != nil { +// return errors.Wrap(err, "read failed") +// } +// +// If additional control is required, the errors.WithStack and +// errors.WithMessage functions destructure errors.Wrap into its component +// operations: annotating an error with a stack trace and with a message, +// respectively. +// +// Retrieving the cause of an error +// +// Using errors.Wrap constructs a stack of errors, adding context to the +// preceding error. Depending on the nature of the error it may be necessary +// to reverse the operation of errors.Wrap to retrieve the original error +// for inspection. Any error value which implements this interface +// +// type causer interface { +// Cause() error +// } +// +// can be inspected by errors.Cause. errors.Cause will recursively retrieve +// the topmost error that does not implement causer, which is assumed to be +// the original cause. For example: +// +// switch err := errors.Cause(err).(type) { +// case *MyError: +// // handle specifically +// default: +// // unknown error +// } +// +// Although the causer interface is not exported by this package, it is +// considered a part of its stable public interface. +// +// Formatted printing of errors +// +// All error values returned from this package implement fmt.Formatter and can +// be formatted by the fmt package. The following verbs are supported: +// +// %s print the error. If the error has a Cause it will be +// printed recursively. +// %v see %s +// %+v extended format. Each Frame of the error's StackTrace will +// be printed in detail. +// +// Retrieving the stack trace of an error or wrapper +// +// New, Errorf, Wrap, and Wrapf record a stack trace at the point they are +// invoked. This information can be retrieved with the following interface: +// +// type stackTracer interface { +// StackTrace() errors.StackTrace +// } +// +// The returned errors.StackTrace type is defined as +// +// type StackTrace []Frame +// +// The Frame type represents a call site in the stack trace. Frame supports +// the fmt.Formatter interface that can be used for printing information about +// the stack trace of this error. For example: +// +// if err, ok := err.(stackTracer); ok { +// for _, f := range err.StackTrace() { +// fmt.Printf("%+s:%d", f) +// } +// } +// +// Although the stackTracer interface is not exported by this package, it is +// considered a part of its stable public interface. +// +// See the documentation for Frame.Format for more details. +package errors + +import ( + "fmt" + "io" +) + +// New returns an error with the supplied message. +// New also records the stack trace at the point it was called. +func New(message string) error { + return &fundamental{ + msg: message, + stack: callers(), + } +} + +// Errorf formats according to a format specifier and returns the string +// as a value that satisfies error. +// Errorf also records the stack trace at the point it was called. +func Errorf(format string, args ...interface{}) error { + return &fundamental{ + msg: fmt.Sprintf(format, args...), + stack: callers(), + } +} + +// fundamental is an error that has a message and a stack, but no caller. +type fundamental struct { + msg string + *stack +} + +func (f *fundamental) Error() string { return f.msg } + +func (f *fundamental) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + if s.Flag('+') { + io.WriteString(s, f.msg) + f.stack.Format(s, verb) + return + } + fallthrough + case 's': + io.WriteString(s, f.msg) + case 'q': + fmt.Fprintf(s, "%q", f.msg) + } +} + +// WithStack annotates err with a stack trace at the point WithStack was called. +// If err is nil, WithStack returns nil. +func WithStack(err error) error { + if err == nil { + return nil + } + return &withStack{ + err, + callers(), + } +} + +type withStack struct { + error + *stack +} + +func (w *withStack) Cause() error { return w.error } + +func (w *withStack) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + if s.Flag('+') { + fmt.Fprintf(s, "%+v", w.Cause()) + w.stack.Format(s, verb) + return + } + fallthrough + case 's': + io.WriteString(s, w.Error()) + case 'q': + fmt.Fprintf(s, "%q", w.Error()) + } +} + +// Wrap returns an error annotating err with a stack trace +// at the point Wrap is called, and the supplied message. +// If err is nil, Wrap returns nil. +func Wrap(err error, message string) error { + if err == nil { + return nil + } + err = &withMessage{ + cause: err, + msg: message, + } + return &withStack{ + err, + callers(), + } +} + +// Wrapf returns an error annotating err with a stack trace +// at the point Wrapf is called, and the format specifier. +// If err is nil, Wrapf returns nil. +func Wrapf(err error, format string, args ...interface{}) error { + if err == nil { + return nil + } + err = &withMessage{ + cause: err, + msg: fmt.Sprintf(format, args...), + } + return &withStack{ + err, + callers(), + } +} + +// WithMessage annotates err with a new message. +// If err is nil, WithMessage returns nil. +func WithMessage(err error, message string) error { + if err == nil { + return nil + } + return &withMessage{ + cause: err, + msg: message, + } +} + +// WithMessagef annotates err with the format specifier. +// If err is nil, WithMessagef returns nil. +func WithMessagef(err error, format string, args ...interface{}) error { + if err == nil { + return nil + } + return &withMessage{ + cause: err, + msg: fmt.Sprintf(format, args...), + } +} + +type withMessage struct { + cause error + msg string +} + +func (w *withMessage) Error() string { return w.msg + ": " + w.cause.Error() } +func (w *withMessage) Cause() error { return w.cause } + +func (w *withMessage) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + if s.Flag('+') { + fmt.Fprintf(s, "%+v\n", w.Cause()) + io.WriteString(s, w.msg) + return + } + fallthrough + case 's', 'q': + io.WriteString(s, w.Error()) + } +} + +// Cause returns the underlying cause of the error, if possible. +// An error value has a cause if it implements the following +// interface: +// +// type causer interface { +// Cause() error +// } +// +// If the error does not implement Cause, the original error will +// be returned. If the error is nil, nil will be returned without further +// investigation. +func Cause(err error) error { + type causer interface { + Cause() error + } + + for err != nil { + cause, ok := err.(causer) + if !ok { + break + } + err = cause.Cause() + } + return err +} diff --git a/vendor/github.com/pkg/errors/stack.go b/vendor/github.com/pkg/errors/stack.go new file mode 100644 index 0000000000..2874a048cf --- /dev/null +++ b/vendor/github.com/pkg/errors/stack.go @@ -0,0 +1,147 @@ +package errors + +import ( + "fmt" + "io" + "path" + "runtime" + "strings" +) + +// Frame represents a program counter inside a stack frame. +type Frame uintptr + +// pc returns the program counter for this frame; +// multiple frames may have the same PC value. +func (f Frame) pc() uintptr { return uintptr(f) - 1 } + +// file returns the full path to the file that contains the +// function for this Frame's pc. +func (f Frame) file() string { + fn := runtime.FuncForPC(f.pc()) + if fn == nil { + return "unknown" + } + file, _ := fn.FileLine(f.pc()) + return file +} + +// line returns the line number of source code of the +// function for this Frame's pc. +func (f Frame) line() int { + fn := runtime.FuncForPC(f.pc()) + if fn == nil { + return 0 + } + _, line := fn.FileLine(f.pc()) + return line +} + +// Format formats the frame according to the fmt.Formatter interface. +// +// %s source file +// %d source line +// %n function name +// %v equivalent to %s:%d +// +// Format accepts flags that alter the printing of some verbs, as follows: +// +// %+s function name and path of source file relative to the compile time +// GOPATH separated by \n\t (<funcname>\n\t<path>) +// %+v equivalent to %+s:%d +func (f Frame) Format(s fmt.State, verb rune) { + switch verb { + case 's': + switch { + case s.Flag('+'): + pc := f.pc() + fn := runtime.FuncForPC(pc) + if fn == nil { + io.WriteString(s, "unknown") + } else { + file, _ := fn.FileLine(pc) + fmt.Fprintf(s, "%s\n\t%s", fn.Name(), file) + } + default: + io.WriteString(s, path.Base(f.file())) + } + case 'd': + fmt.Fprintf(s, "%d", f.line()) + case 'n': + name := runtime.FuncForPC(f.pc()).Name() + io.WriteString(s, funcname(name)) + case 'v': + f.Format(s, 's') + io.WriteString(s, ":") + f.Format(s, 'd') + } +} + +// StackTrace is stack of Frames from innermost (newest) to outermost (oldest). +type StackTrace []Frame + +// Format formats the stack of Frames according to the fmt.Formatter interface. +// +// %s lists source files for each Frame in the stack +// %v lists the source file and line number for each Frame in the stack +// +// Format accepts flags that alter the printing of some verbs, as follows: +// +// %+v Prints filename, function, and line number for each Frame in the stack. +func (st StackTrace) Format(s fmt.State, verb rune) { + switch verb { + case 'v': + switch { + case s.Flag('+'): + for _, f := range st { + fmt.Fprintf(s, "\n%+v", f) + } + case s.Flag('#'): + fmt.Fprintf(s, "%#v", []Frame(st)) + default: + fmt.Fprintf(s, "%v", []Frame(st)) + } + case 's': + fmt.Fprintf(s, "%s", []Frame(st)) + } +} + +// stack represents a stack of program counters. +type stack []uintptr + +func (s *stack) Format(st fmt.State, verb rune) { + switch verb { + case 'v': + switch { + case st.Flag('+'): + for _, pc := range *s { + f := Frame(pc) + fmt.Fprintf(st, "\n%+v", f) + } + } + } +} + +func (s *stack) StackTrace() StackTrace { + f := make([]Frame, len(*s)) + for i := 0; i < len(f); i++ { + f[i] = Frame((*s)[i]) + } + return f +} + +func callers() *stack { + const depth = 32 + var pcs [depth]uintptr + n := runtime.Callers(3, pcs[:]) + var st stack = pcs[0:n] + return &st +} + +// funcname removes the path prefix component of a function's name reported by func.Name(). +func funcname(name string) string { + i := strings.LastIndex(name, "/") + name = name[i+1:] + i = strings.Index(name, ".") + return name[i+1:] +} diff --git a/vendor/github.com/siddontang/go-snappy/AUTHORS b/vendor/github.com/siddontang/go-snappy/AUTHORS new file mode 100644 index 0000000000..8ddb5b7a2b --- /dev/null +++ b/vendor/github.com/siddontang/go-snappy/AUTHORS @@ -0,0 +1,12 @@ +# This is the official list of Snappy-Go authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization <email address> +# The email address is not required for organizations. + +# Please keep the list sorted. + +Google Inc. +Jan Mercl <0xjnml@gmail.com> diff --git a/vendor/github.com/siddontang/go-snappy/CONTRIBUTORS b/vendor/github.com/siddontang/go-snappy/CONTRIBUTORS new file mode 100644 index 0000000000..50b69c80ea --- /dev/null +++ b/vendor/github.com/siddontang/go-snappy/CONTRIBUTORS @@ -0,0 +1,34 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Snappy-Go repository. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# The submission process automatically checks to make sure +# that people submitting code are listed in this file (by email address). +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# http://code.google.com/legal/individual-cla-v1.0.html +# http://code.google.com/legal/corporate-cla-v1.0.html +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. + +# Names should be added to this file like so: +# Name <email address> + +# Please keep the list sorted. + +Jan Mercl <0xjnml@gmail.com> +Kai Backman <kaib@golang.org> +Marc-Antoine Ruel <maruel@chromium.org> +Nigel Tao <nigeltao@golang.org> +Rob Pike <r@golang.org> +Russ Cox <rsc@golang.org> diff --git a/vendor/github.com/siddontang/go-snappy/LICENSE b/vendor/github.com/siddontang/go-snappy/LICENSE new file mode 100644 index 0000000000..6050c10f4c --- /dev/null +++ b/vendor/github.com/siddontang/go-snappy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/siddontang/go-snappy/snappy/decode.go b/vendor/github.com/siddontang/go-snappy/snappy/decode.go new file mode 100644 index 0000000000..d93c1b9dbf --- /dev/null +++ b/vendor/github.com/siddontang/go-snappy/snappy/decode.go @@ -0,0 +1,124 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" +) + +// ErrCorrupt reports that the input is invalid. +var ErrCorrupt = errors.New("snappy: corrupt input") + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n == 0 { + return 0, 0, ErrCorrupt + } + if uint64(int(v)) != v { + return 0, 0, errors.New("snappy: decoded block is too large") + } + return int(v), n, nil +} + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// It is valid to pass a nil dst. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if len(dst) < dLen { + dst = make([]byte, dLen) + } + + var d, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint(src[s] >> 2) + switch { + case x < 60: + s += 1 + case x == 60: + s += 2 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-1]) + case x == 61: + s += 3 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-2]) | uint(src[s-1])<<8 + case x == 62: + s += 4 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16 + case x == 63: + s += 5 + if s > len(src) { + return nil, ErrCorrupt + } + x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24 + } + length = int(x + 1) + if length <= 0 { + return nil, errors.New("snappy: unsupported literal length") + } + if length > len(dst)-d || length > len(src)-s { + return nil, ErrCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if s > len(src) { + return nil, ErrCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(src[s-2])&0xe0<<3 | int(src[s-1]) + + case tagCopy2: + s += 3 + if s > len(src) { + return nil, ErrCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(src[s-2]) | int(src[s-1])<<8 + + case tagCopy4: + return nil, errors.New("snappy: unsupported COPY_4 tag") + } + + end := d + length + if offset > d || end > len(dst) { + return nil, ErrCorrupt + } + for ; d < end; d++ { + dst[d] = dst[d-offset] + } + } + if d != dLen { + return nil, ErrCorrupt + } + return dst[:d], nil +} diff --git a/vendor/github.com/siddontang/go-snappy/snappy/encode.go b/vendor/github.com/siddontang/go-snappy/snappy/encode.go new file mode 100644 index 0000000000..b2371db11c --- /dev/null +++ b/vendor/github.com/siddontang/go-snappy/snappy/encode.go @@ -0,0 +1,174 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" +) + +// We limit how far copy back-references can go, the same as the C++ code. +const maxOffset = 1 << 15 + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + case n < 1<<16: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + case n < 1<<24: + dst[0] = 62<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + dst[3] = uint8(n >> 16) + i = 4 + case int64(n) < 1<<32: + dst[0] = 63<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + dst[3] = uint8(n >> 16) + dst[4] = uint8(n >> 24) + i = 5 + default: + panic("snappy: source buffer is too long") + } + if copy(dst[i:], lit) != len(lit) { + panic("snappy: destination buffer is too short") + } + return i + len(lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +func emitCopy(dst []byte, offset, length int) int { + i := 0 + for length > 0 { + x := length - 4 + if 0 <= x && x < 1<<3 && offset < 1<<11 { + dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + i += 2 + break + } + + x = length + if x > 1<<6 { + x = 1 << 6 + } + dst[i+0] = uint8(x-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= x + } + return i +} + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// It is valid to pass a nil dst. +func Encode(dst, src []byte) ([]byte, error) { + if n := MaxEncodedLen(len(src)); len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + // Return early if src is short. + if len(src) <= 4 { + if len(src) != 0 { + d += emitLiteral(dst[d:], src) + } + return dst[:d], nil + } + + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + const maxTableSize = 1 << 14 + shift, tableSize := uint(32-8), 1<<8 + for tableSize < maxTableSize && tableSize < len(src) { + shift-- + tableSize *= 2 + } + var table [maxTableSize]int + + // Iterate over the source bytes. + var ( + s int // The iterator position. + t int // The last position with the same hash as s. + lit int // The start position of any pending literal bytes. + ) + for s+3 < len(src) { + // Update the hash table. + b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3] + h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24 + p := &table[(h*0x1e35a7bd)>>shift] + // We need to to store values in [-1, inf) in table. To save + // some initialization time, (re)use the table's zero value + // and shift the values against this zero: add 1 on writes, + // subtract 1 on reads. + t, *p = *p-1, s+1 + // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. + if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] { + s++ + continue + } + // Otherwise, we have a match. First, emit any pending literal bytes. + if lit != s { + d += emitLiteral(dst[d:], src[lit:s]) + } + // Extend the match to be as long as possible. + s0 := s + s, t = s+4, t+4 + for s < len(src) && src[s] == src[t] { + s++ + t++ + } + // Emit the copied bytes. + d += emitCopy(dst[d:], s-t, s-s0) + lit = s + } + + // Emit any final pending literal bytes and return. + if lit != len(src) { + d += emitLiteral(dst[d:], src[lit:]) + } + return dst[:d], nil +} + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +func MaxEncodedLen(srcLen int) int { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + srcLen + srcLen/6 +} diff --git a/vendor/github.com/siddontang/go-snappy/snappy/snappy.go b/vendor/github.com/siddontang/go-snappy/snappy/snappy.go new file mode 100644 index 0000000000..2f1b790d0b --- /dev/null +++ b/vendor/github.com/siddontang/go-snappy/snappy/snappy.go @@ -0,0 +1,38 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the snappy block-based compression format. +// It aims for very high speeds and reasonable compression. +// +// The C++ snappy implementation is at http://code.google.com/p/snappy/ +package snappy + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer supported. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) diff --git a/vendor/github.com/syndtr/goleveldb/LICENSE b/vendor/github.com/syndtr/goleveldb/LICENSE new file mode 100644 index 0000000000..4a772d1ab3 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/LICENSE @@ -0,0 +1,24 @@ +Copyright 2012 Suryandaru Triandana <syndtr@gmail.com> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go new file mode 100644 index 0000000000..225920002d --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go @@ -0,0 +1,349 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "encoding/binary" + "fmt" + "io" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +// ErrBatchCorrupted records reason of batch corruption. This error will be +// wrapped with errors.ErrCorrupted. +type ErrBatchCorrupted struct { + Reason string +} + +func (e *ErrBatchCorrupted) Error() string { + return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason) +} + +func newErrBatchCorrupted(reason string) error { + return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason}) +} + +const ( + batchHeaderLen = 8 + 4 + batchGrowRec = 3000 + batchBufioSize = 16 +) + +// BatchReplay wraps basic batch operations. +type BatchReplay interface { + Put(key, value []byte) + Delete(key []byte) +} + +type batchIndex struct { + keyType keyType + keyPos, keyLen int + valuePos, valueLen int +} + +func (index batchIndex) k(data []byte) []byte { + return data[index.keyPos : index.keyPos+index.keyLen] +} + +func (index batchIndex) v(data []byte) []byte { + if index.valueLen != 0 { + return data[index.valuePos : index.valuePos+index.valueLen] + } + return nil +} + +func (index batchIndex) kv(data []byte) (key, value []byte) { + return index.k(data), index.v(data) +} + +// Batch is a write batch. +type Batch struct { + data []byte + index []batchIndex + + // internalLen is sums of key/value pair length plus 8-bytes internal key. + internalLen int +} + +func (b *Batch) grow(n int) { + o := len(b.data) + if cap(b.data)-o < n { + div := 1 + if len(b.index) > batchGrowRec { + div = len(b.index) / batchGrowRec + } + ndata := make([]byte, o, o+n+o/div) + copy(ndata, b.data) + b.data = ndata + } +} + +func (b *Batch) appendRec(kt keyType, key, value []byte) { + n := 1 + binary.MaxVarintLen32 + len(key) + if kt == keyTypeVal { + n += binary.MaxVarintLen32 + len(value) + } + b.grow(n) + index := batchIndex{keyType: kt} + o := len(b.data) + data := b.data[:o+n] + data[o] = byte(kt) + o++ + o += binary.PutUvarint(data[o:], uint64(len(key))) + index.keyPos = o + index.keyLen = len(key) + o += copy(data[o:], key) + if kt == keyTypeVal { + o += binary.PutUvarint(data[o:], uint64(len(value))) + index.valuePos = o + index.valueLen = len(value) + o += copy(data[o:], value) + } + b.data = data[:o] + b.index = append(b.index, index) + b.internalLen += index.keyLen + index.valueLen + 8 +} + +// Put appends 'put operation' of the given key/value pair to the batch. +// It is safe to modify the contents of the argument after Put returns but not +// before. +func (b *Batch) Put(key, value []byte) { + b.appendRec(keyTypeVal, key, value) +} + +// Delete appends 'delete operation' of the given key to the batch. +// It is safe to modify the contents of the argument after Delete returns but +// not before. +func (b *Batch) Delete(key []byte) { + b.appendRec(keyTypeDel, key, nil) +} + +// Dump dumps batch contents. The returned slice can be loaded into the +// batch using Load method. +// The returned slice is not its own copy, so the contents should not be +// modified. +func (b *Batch) Dump() []byte { + return b.data +} + +// Load loads given slice into the batch. Previous contents of the batch +// will be discarded. +// The given slice will not be copied and will be used as batch buffer, so +// it is not safe to modify the contents of the slice. +func (b *Batch) Load(data []byte) error { + return b.decode(data, -1) +} + +// Replay replays batch contents. +func (b *Batch) Replay(r BatchReplay) error { + for _, index := range b.index { + switch index.keyType { + case keyTypeVal: + r.Put(index.k(b.data), index.v(b.data)) + case keyTypeDel: + r.Delete(index.k(b.data)) + } + } + return nil +} + +// Len returns number of records in the batch. +func (b *Batch) Len() int { + return len(b.index) +} + +// Reset resets the batch. +func (b *Batch) Reset() { + b.data = b.data[:0] + b.index = b.index[:0] + b.internalLen = 0 +} + +func (b *Batch) replayInternal(fn func(i int, kt keyType, k, v []byte) error) error { + for i, index := range b.index { + if err := fn(i, index.keyType, index.k(b.data), index.v(b.data)); err != nil { + return err + } + } + return nil +} + +func (b *Batch) append(p *Batch) { + ob := len(b.data) + oi := len(b.index) + b.data = append(b.data, p.data...) + b.index = append(b.index, p.index...) + b.internalLen += p.internalLen + + // Updating index offset. + if ob != 0 { + for ; oi < len(b.index); oi++ { + index := &b.index[oi] + index.keyPos += ob + if index.valueLen != 0 { + index.valuePos += ob + } + } + } +} + +func (b *Batch) decode(data []byte, expectedLen int) error { + b.data = data + b.index = b.index[:0] + b.internalLen = 0 + err := decodeBatch(data, func(i int, index batchIndex) error { + b.index = append(b.index, index) + b.internalLen += index.keyLen + index.valueLen + 8 + return nil + }) + if err != nil { + return err + } + if expectedLen >= 0 && len(b.index) != expectedLen { + return newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", expectedLen, len(b.index))) + } + return nil +} + +func (b *Batch) putMem(seq uint64, mdb *memdb.DB) error { + var ik []byte + for i, index := range b.index { + ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType) + if err := mdb.Put(ik, index.v(b.data)); err != nil { + return err + } + } + return nil +} + +func (b *Batch) revertMem(seq uint64, mdb *memdb.DB) error { + var ik []byte + for i, index := range b.index { + ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType) + if err := mdb.Delete(ik); err != nil { + return err + } + } + return nil +} + +func newBatch() interface{} { + return &Batch{} +} + +func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error { + var index batchIndex + for i, o := 0, 0; o < len(data); i++ { + // Key type. + index.keyType = keyType(data[o]) + if index.keyType > keyTypeVal { + return newErrBatchCorrupted(fmt.Sprintf("bad record: invalid type %#x", uint(index.keyType))) + } + o++ + + // Key. + x, n := binary.Uvarint(data[o:]) + o += n + if n <= 0 || o+int(x) > len(data) { + return newErrBatchCorrupted("bad record: invalid key length") + } + index.keyPos = o + index.keyLen = int(x) + o += index.keyLen + + // Value. + if index.keyType == keyTypeVal { + x, n = binary.Uvarint(data[o:]) + o += n + if n <= 0 || o+int(x) > len(data) { + return newErrBatchCorrupted("bad record: invalid value length") + } + index.valuePos = o + index.valueLen = int(x) + o += index.valueLen + } else { + index.valuePos = 0 + index.valueLen = 0 + } + + if err := fn(i, index); err != nil { + return err + } + } + return nil +} + +func decodeBatchToMem(data []byte, expectSeq uint64, mdb *memdb.DB) (seq uint64, batchLen int, err error) { + seq, batchLen, err = decodeBatchHeader(data) + if err != nil { + return 0, 0, err + } + if seq < expectSeq { + return 0, 0, newErrBatchCorrupted("invalid sequence number") + } + data = data[batchHeaderLen:] + var ik []byte + var decodedLen int + err = decodeBatch(data, func(i int, index batchIndex) error { + if i >= batchLen { + return newErrBatchCorrupted("invalid records length") + } + ik = makeInternalKey(ik, index.k(data), seq+uint64(i), index.keyType) + if err := mdb.Put(ik, index.v(data)); err != nil { + return err + } + decodedLen++ + return nil + }) + if err == nil && decodedLen != batchLen { + err = newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", batchLen, decodedLen)) + } + return +} + +func encodeBatchHeader(dst []byte, seq uint64, batchLen int) []byte { + dst = ensureBuffer(dst, batchHeaderLen) + binary.LittleEndian.PutUint64(dst, seq) + binary.LittleEndian.PutUint32(dst[8:], uint32(batchLen)) + return dst +} + +func decodeBatchHeader(data []byte) (seq uint64, batchLen int, err error) { + if len(data) < batchHeaderLen { + return 0, 0, newErrBatchCorrupted("too short") + } + + seq = binary.LittleEndian.Uint64(data) + batchLen = int(binary.LittleEndian.Uint32(data[8:])) + if batchLen < 0 { + return 0, 0, newErrBatchCorrupted("invalid records length") + } + return +} + +func batchesLen(batches []*Batch) int { + batchLen := 0 + for _, batch := range batches { + batchLen += batch.Len() + } + return batchLen +} + +func writeBatchesWithHeader(wr io.Writer, batches []*Batch, seq uint64) error { + if _, err := wr.Write(encodeBatchHeader(nil, seq, batchesLen(batches))); err != nil { + return err + } + for _, batch := range batches { + if _, err := wr.Write(batch.data); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go new file mode 100644 index 0000000000..c36ad32359 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go @@ -0,0 +1,704 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package cache provides interface and implementation of a cache algorithms. +package cache + +import ( + "sync" + "sync/atomic" + "unsafe" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +// Cacher provides interface to implements a caching functionality. +// An implementation must be safe for concurrent use. +type Cacher interface { + // Capacity returns cache capacity. + Capacity() int + + // SetCapacity sets cache capacity. + SetCapacity(capacity int) + + // Promote promotes the 'cache node'. + Promote(n *Node) + + // Ban evicts the 'cache node' and prevent subsequent 'promote'. + Ban(n *Node) + + // Evict evicts the 'cache node'. + Evict(n *Node) + + // EvictNS evicts 'cache node' with the given namespace. + EvictNS(ns uint64) + + // EvictAll evicts all 'cache node'. + EvictAll() + + // Close closes the 'cache tree' + Close() error +} + +// Value is a 'cacheable object'. It may implements util.Releaser, if +// so the the Release method will be called once object is released. +type Value interface{} + +// NamespaceGetter provides convenient wrapper for namespace. +type NamespaceGetter struct { + Cache *Cache + NS uint64 +} + +// Get simply calls Cache.Get() method. +func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle { + return g.Cache.Get(g.NS, key, setFunc) +} + +// The hash tables implementation is based on: +// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, +// Kunlong Zhang, and Michael Spear. +// ACM Symposium on Principles of Distributed Computing, Jul 2014. + +const ( + mInitialSize = 1 << 4 + mOverflowThreshold = 1 << 5 + mOverflowGrowThreshold = 1 << 7 +) + +type mBucket struct { + mu sync.Mutex + node []*Node + frozen bool +} + +func (b *mBucket) freeze() []*Node { + b.mu.Lock() + defer b.mu.Unlock() + if !b.frozen { + b.frozen = true + } + return b.node +} + +func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) { + b.mu.Lock() + + if b.frozen { + b.mu.Unlock() + return + } + + // Scan the node. + for _, n := range b.node { + if n.hash == hash && n.ns == ns && n.key == key { + atomic.AddInt32(&n.ref, 1) + b.mu.Unlock() + return true, false, n + } + } + + // Get only. + if noset { + b.mu.Unlock() + return true, false, nil + } + + // Create node. + n = &Node{ + r: r, + hash: hash, + ns: ns, + key: key, + ref: 1, + } + // Add node to bucket. + b.node = append(b.node, n) + bLen := len(b.node) + b.mu.Unlock() + + // Update counter. + grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold + if bLen > mOverflowThreshold { + grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold + } + + // Grow. + if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) { + nhLen := len(h.buckets) << 1 + nh := &mNode{ + buckets: make([]unsafe.Pointer, nhLen), + mask: uint32(nhLen) - 1, + pred: unsafe.Pointer(h), + growThreshold: int32(nhLen * mOverflowThreshold), + shrinkThreshold: int32(nhLen >> 1), + } + ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh)) + if !ok { + panic("BUG: failed swapping head") + } + go nh.initBuckets() + } + + return true, true, n +} + +func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) { + b.mu.Lock() + + if b.frozen { + b.mu.Unlock() + return + } + + // Scan the node. + var ( + n *Node + bLen int + ) + for i := range b.node { + n = b.node[i] + if n.ns == ns && n.key == key { + if atomic.LoadInt32(&n.ref) == 0 { + deleted = true + + // Call releaser. + if n.value != nil { + if r, ok := n.value.(util.Releaser); ok { + r.Release() + } + n.value = nil + } + + // Remove node from bucket. + b.node = append(b.node[:i], b.node[i+1:]...) + bLen = len(b.node) + } + break + } + } + b.mu.Unlock() + + if deleted { + // Call OnDel. + for _, f := range n.onDel { + f() + } + + // Update counter. + atomic.AddInt32(&r.size, int32(n.size)*-1) + shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold + if bLen >= mOverflowThreshold { + atomic.AddInt32(&h.overflow, -1) + } + + // Shrink. + if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) { + nhLen := len(h.buckets) >> 1 + nh := &mNode{ + buckets: make([]unsafe.Pointer, nhLen), + mask: uint32(nhLen) - 1, + pred: unsafe.Pointer(h), + growThreshold: int32(nhLen * mOverflowThreshold), + shrinkThreshold: int32(nhLen >> 1), + } + ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh)) + if !ok { + panic("BUG: failed swapping head") + } + go nh.initBuckets() + } + } + + return true, deleted +} + +type mNode struct { + buckets []unsafe.Pointer // []*mBucket + mask uint32 + pred unsafe.Pointer // *mNode + resizeInProgess int32 + + overflow int32 + growThreshold int32 + shrinkThreshold int32 +} + +func (n *mNode) initBucket(i uint32) *mBucket { + if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil { + return b + } + + p := (*mNode)(atomic.LoadPointer(&n.pred)) + if p != nil { + var node []*Node + if n.mask > p.mask { + // Grow. + pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask])) + if pb == nil { + pb = p.initBucket(i & p.mask) + } + m := pb.freeze() + // Split nodes. + for _, x := range m { + if x.hash&n.mask == i { + node = append(node, x) + } + } + } else { + // Shrink. + pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i])) + if pb0 == nil { + pb0 = p.initBucket(i) + } + pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))])) + if pb1 == nil { + pb1 = p.initBucket(i + uint32(len(n.buckets))) + } + m0 := pb0.freeze() + m1 := pb1.freeze() + // Merge nodes. + node = make([]*Node, 0, len(m0)+len(m1)) + node = append(node, m0...) + node = append(node, m1...) + } + b := &mBucket{node: node} + if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) { + if len(node) > mOverflowThreshold { + atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold)) + } + return b + } + } + + return (*mBucket)(atomic.LoadPointer(&n.buckets[i])) +} + +func (n *mNode) initBuckets() { + for i := range n.buckets { + n.initBucket(uint32(i)) + } + atomic.StorePointer(&n.pred, nil) +} + +// Cache is a 'cache map'. +type Cache struct { + mu sync.RWMutex + mHead unsafe.Pointer // *mNode + nodes int32 + size int32 + cacher Cacher + closed bool +} + +// NewCache creates a new 'cache map'. The cacher is optional and +// may be nil. +func NewCache(cacher Cacher) *Cache { + h := &mNode{ + buckets: make([]unsafe.Pointer, mInitialSize), + mask: mInitialSize - 1, + growThreshold: int32(mInitialSize * mOverflowThreshold), + shrinkThreshold: 0, + } + for i := range h.buckets { + h.buckets[i] = unsafe.Pointer(&mBucket{}) + } + r := &Cache{ + mHead: unsafe.Pointer(h), + cacher: cacher, + } + return r +} + +func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) { + h := (*mNode)(atomic.LoadPointer(&r.mHead)) + i := hash & h.mask + b := (*mBucket)(atomic.LoadPointer(&h.buckets[i])) + if b == nil { + b = h.initBucket(i) + } + return h, b +} + +func (r *Cache) delete(n *Node) bool { + for { + h, b := r.getBucket(n.hash) + done, deleted := b.delete(r, h, n.hash, n.ns, n.key) + if done { + return deleted + } + } +} + +// Nodes returns number of 'cache node' in the map. +func (r *Cache) Nodes() int { + return int(atomic.LoadInt32(&r.nodes)) +} + +// Size returns sums of 'cache node' size in the map. +func (r *Cache) Size() int { + return int(atomic.LoadInt32(&r.size)) +} + +// Capacity returns cache capacity. +func (r *Cache) Capacity() int { + if r.cacher == nil { + return 0 + } + return r.cacher.Capacity() +} + +// SetCapacity sets cache capacity. +func (r *Cache) SetCapacity(capacity int) { + if r.cacher != nil { + r.cacher.SetCapacity(capacity) + } +} + +// Get gets 'cache node' with the given namespace and key. +// If cache node is not found and setFunc is not nil, Get will atomically creates +// the 'cache node' by calling setFunc. Otherwise Get will returns nil. +// +// The returned 'cache handle' should be released after use by calling Release +// method. +func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle { + r.mu.RLock() + defer r.mu.RUnlock() + if r.closed { + return nil + } + + hash := murmur32(ns, key, 0xf00) + for { + h, b := r.getBucket(hash) + done, _, n := b.get(r, h, hash, ns, key, setFunc == nil) + if done { + if n != nil { + n.mu.Lock() + if n.value == nil { + if setFunc == nil { + n.mu.Unlock() + n.unref() + return nil + } + + n.size, n.value = setFunc() + if n.value == nil { + n.size = 0 + n.mu.Unlock() + n.unref() + return nil + } + atomic.AddInt32(&r.size, int32(n.size)) + } + n.mu.Unlock() + if r.cacher != nil { + r.cacher.Promote(n) + } + return &Handle{unsafe.Pointer(n)} + } + + break + } + } + return nil +} + +// Delete removes and ban 'cache node' with the given namespace and key. +// A banned 'cache node' will never inserted into the 'cache tree'. Ban +// only attributed to the particular 'cache node', so when a 'cache node' +// is recreated it will not be banned. +// +// If onDel is not nil, then it will be executed if such 'cache node' +// doesn't exist or once the 'cache node' is released. +// +// Delete return true is such 'cache node' exist. +func (r *Cache) Delete(ns, key uint64, onDel func()) bool { + r.mu.RLock() + defer r.mu.RUnlock() + if r.closed { + return false + } + + hash := murmur32(ns, key, 0xf00) + for { + h, b := r.getBucket(hash) + done, _, n := b.get(r, h, hash, ns, key, true) + if done { + if n != nil { + if onDel != nil { + n.mu.Lock() + n.onDel = append(n.onDel, onDel) + n.mu.Unlock() + } + if r.cacher != nil { + r.cacher.Ban(n) + } + n.unref() + return true + } + + break + } + } + + if onDel != nil { + onDel() + } + + return false +} + +// Evict evicts 'cache node' with the given namespace and key. This will +// simply call Cacher.Evict. +// +// Evict return true is such 'cache node' exist. +func (r *Cache) Evict(ns, key uint64) bool { + r.mu.RLock() + defer r.mu.RUnlock() + if r.closed { + return false + } + + hash := murmur32(ns, key, 0xf00) + for { + h, b := r.getBucket(hash) + done, _, n := b.get(r, h, hash, ns, key, true) + if done { + if n != nil { + if r.cacher != nil { + r.cacher.Evict(n) + } + n.unref() + return true + } + + break + } + } + + return false +} + +// EvictNS evicts 'cache node' with the given namespace. This will +// simply call Cacher.EvictNS. +func (r *Cache) EvictNS(ns uint64) { + r.mu.RLock() + defer r.mu.RUnlock() + if r.closed { + return + } + + if r.cacher != nil { + r.cacher.EvictNS(ns) + } +} + +// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll. +func (r *Cache) EvictAll() { + r.mu.RLock() + defer r.mu.RUnlock() + if r.closed { + return + } + + if r.cacher != nil { + r.cacher.EvictAll() + } +} + +// Close closes the 'cache map' and forcefully releases all 'cache node'. +func (r *Cache) Close() error { + r.mu.Lock() + if !r.closed { + r.closed = true + + h := (*mNode)(r.mHead) + h.initBuckets() + + for i := range h.buckets { + b := (*mBucket)(h.buckets[i]) + for _, n := range b.node { + // Call releaser. + if n.value != nil { + if r, ok := n.value.(util.Releaser); ok { + r.Release() + } + n.value = nil + } + + // Call OnDel. + for _, f := range n.onDel { + f() + } + n.onDel = nil + } + } + } + r.mu.Unlock() + + // Avoid deadlock. + if r.cacher != nil { + if err := r.cacher.Close(); err != nil { + return err + } + } + return nil +} + +// CloseWeak closes the 'cache map' and evict all 'cache node' from cacher, but +// unlike Close it doesn't forcefully releases 'cache node'. +func (r *Cache) CloseWeak() error { + r.mu.Lock() + if !r.closed { + r.closed = true + } + r.mu.Unlock() + + // Avoid deadlock. + if r.cacher != nil { + r.cacher.EvictAll() + if err := r.cacher.Close(); err != nil { + return err + } + } + return nil +} + +// Node is a 'cache node'. +type Node struct { + r *Cache + + hash uint32 + ns, key uint64 + + mu sync.Mutex + size int + value Value + + ref int32 + onDel []func() + + CacheData unsafe.Pointer +} + +// NS returns this 'cache node' namespace. +func (n *Node) NS() uint64 { + return n.ns +} + +// Key returns this 'cache node' key. +func (n *Node) Key() uint64 { + return n.key +} + +// Size returns this 'cache node' size. +func (n *Node) Size() int { + return n.size +} + +// Value returns this 'cache node' value. +func (n *Node) Value() Value { + return n.value +} + +// Ref returns this 'cache node' ref counter. +func (n *Node) Ref() int32 { + return atomic.LoadInt32(&n.ref) +} + +// GetHandle returns an handle for this 'cache node'. +func (n *Node) GetHandle() *Handle { + if atomic.AddInt32(&n.ref, 1) <= 1 { + panic("BUG: Node.GetHandle on zero ref") + } + return &Handle{unsafe.Pointer(n)} +} + +func (n *Node) unref() { + if atomic.AddInt32(&n.ref, -1) == 0 { + n.r.delete(n) + } +} + +func (n *Node) unrefLocked() { + if atomic.AddInt32(&n.ref, -1) == 0 { + n.r.mu.RLock() + if !n.r.closed { + n.r.delete(n) + } + n.r.mu.RUnlock() + } +} + +// Handle is a 'cache handle' of a 'cache node'. +type Handle struct { + n unsafe.Pointer // *Node +} + +// Value returns the value of the 'cache node'. +func (h *Handle) Value() Value { + n := (*Node)(atomic.LoadPointer(&h.n)) + if n != nil { + return n.value + } + return nil +} + +// Release releases this 'cache handle'. +// It is safe to call release multiple times. +func (h *Handle) Release() { + nPtr := atomic.LoadPointer(&h.n) + if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) { + n := (*Node)(nPtr) + n.unrefLocked() + } +} + +func murmur32(ns, key uint64, seed uint32) uint32 { + const ( + m = uint32(0x5bd1e995) + r = 24 + ) + + k1 := uint32(ns >> 32) + k2 := uint32(ns) + k3 := uint32(key >> 32) + k4 := uint32(key) + + k1 *= m + k1 ^= k1 >> r + k1 *= m + + k2 *= m + k2 ^= k2 >> r + k2 *= m + + k3 *= m + k3 ^= k3 >> r + k3 *= m + + k4 *= m + k4 ^= k4 >> r + k4 *= m + + h := seed + + h *= m + h ^= k1 + h *= m + h ^= k2 + h *= m + h ^= k3 + h *= m + h ^= k4 + + h ^= h >> 13 + h *= m + h ^= h >> 15 + + return h +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go new file mode 100644 index 0000000000..d9a84cde15 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go @@ -0,0 +1,195 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package cache + +import ( + "sync" + "unsafe" +) + +type lruNode struct { + n *Node + h *Handle + ban bool + + next, prev *lruNode +} + +func (n *lruNode) insert(at *lruNode) { + x := at.next + at.next = n + n.prev = at + n.next = x + x.prev = n +} + +func (n *lruNode) remove() { + if n.prev != nil { + n.prev.next = n.next + n.next.prev = n.prev + n.prev = nil + n.next = nil + } else { + panic("BUG: removing removed node") + } +} + +type lru struct { + mu sync.Mutex + capacity int + used int + recent lruNode +} + +func (r *lru) reset() { + r.recent.next = &r.recent + r.recent.prev = &r.recent + r.used = 0 +} + +func (r *lru) Capacity() int { + r.mu.Lock() + defer r.mu.Unlock() + return r.capacity +} + +func (r *lru) SetCapacity(capacity int) { + var evicted []*lruNode + + r.mu.Lock() + r.capacity = capacity + for r.used > r.capacity { + rn := r.recent.prev + if rn == nil { + panic("BUG: invalid LRU used or capacity counter") + } + rn.remove() + rn.n.CacheData = nil + r.used -= rn.n.Size() + evicted = append(evicted, rn) + } + r.mu.Unlock() + + for _, rn := range evicted { + rn.h.Release() + } +} + +func (r *lru) Promote(n *Node) { + var evicted []*lruNode + + r.mu.Lock() + if n.CacheData == nil { + if n.Size() <= r.capacity { + rn := &lruNode{n: n, h: n.GetHandle()} + rn.insert(&r.recent) + n.CacheData = unsafe.Pointer(rn) + r.used += n.Size() + + for r.used > r.capacity { + rn := r.recent.prev + if rn == nil { + panic("BUG: invalid LRU used or capacity counter") + } + rn.remove() + rn.n.CacheData = nil + r.used -= rn.n.Size() + evicted = append(evicted, rn) + } + } + } else { + rn := (*lruNode)(n.CacheData) + if !rn.ban { + rn.remove() + rn.insert(&r.recent) + } + } + r.mu.Unlock() + + for _, rn := range evicted { + rn.h.Release() + } +} + +func (r *lru) Ban(n *Node) { + r.mu.Lock() + if n.CacheData == nil { + n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true}) + } else { + rn := (*lruNode)(n.CacheData) + if !rn.ban { + rn.remove() + rn.ban = true + r.used -= rn.n.Size() + r.mu.Unlock() + + rn.h.Release() + rn.h = nil + return + } + } + r.mu.Unlock() +} + +func (r *lru) Evict(n *Node) { + r.mu.Lock() + rn := (*lruNode)(n.CacheData) + if rn == nil || rn.ban { + r.mu.Unlock() + return + } + n.CacheData = nil + r.mu.Unlock() + + rn.h.Release() +} + +func (r *lru) EvictNS(ns uint64) { + var evicted []*lruNode + + r.mu.Lock() + for e := r.recent.prev; e != &r.recent; { + rn := e + e = e.prev + if rn.n.NS() == ns { + rn.remove() + rn.n.CacheData = nil + r.used -= rn.n.Size() + evicted = append(evicted, rn) + } + } + r.mu.Unlock() + + for _, rn := range evicted { + rn.h.Release() + } +} + +func (r *lru) EvictAll() { + r.mu.Lock() + back := r.recent.prev + for rn := back; rn != &r.recent; rn = rn.prev { + rn.n.CacheData = nil + } + r.reset() + r.mu.Unlock() + + for rn := back; rn != &r.recent; rn = rn.prev { + rn.h.Release() + } +} + +func (r *lru) Close() error { + return nil +} + +// NewLRU create a new LRU-cache. +func NewLRU(capacity int) Cacher { + r := &lru{capacity: capacity} + r.reset() + return r +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go new file mode 100644 index 0000000000..448402b826 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go @@ -0,0 +1,67 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/comparer" +) + +type iComparer struct { + ucmp comparer.Comparer +} + +func (icmp *iComparer) uName() string { + return icmp.ucmp.Name() +} + +func (icmp *iComparer) uCompare(a, b []byte) int { + return icmp.ucmp.Compare(a, b) +} + +func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte { + return icmp.ucmp.Separator(dst, a, b) +} + +func (icmp *iComparer) uSuccessor(dst, b []byte) []byte { + return icmp.ucmp.Successor(dst, b) +} + +func (icmp *iComparer) Name() string { + return icmp.uName() +} + +func (icmp *iComparer) Compare(a, b []byte) int { + x := icmp.uCompare(internalKey(a).ukey(), internalKey(b).ukey()) + if x == 0 { + if m, n := internalKey(a).num(), internalKey(b).num(); m > n { + return -1 + } else if m < n { + return 1 + } + } + return x +} + +func (icmp *iComparer) Separator(dst, a, b []byte) []byte { + ua, ub := internalKey(a).ukey(), internalKey(b).ukey() + dst = icmp.uSeparator(dst, ua, ub) + if dst != nil && len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 { + // Append earliest possible number. + return append(dst, keyMaxNumBytes...) + } + return nil +} + +func (icmp *iComparer) Successor(dst, b []byte) []byte { + ub := internalKey(b).ukey() + dst = icmp.uSuccessor(dst, ub) + if dst != nil && len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 { + // Append earliest possible number. + return append(dst, keyMaxNumBytes...) + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go new file mode 100644 index 0000000000..abf9fb65c7 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go @@ -0,0 +1,51 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package comparer + +import "bytes" + +type bytesComparer struct{} + +func (bytesComparer) Compare(a, b []byte) int { + return bytes.Compare(a, b) +} + +func (bytesComparer) Name() string { + return "leveldb.BytewiseComparator" +} + +func (bytesComparer) Separator(dst, a, b []byte) []byte { + i, n := 0, len(a) + if n > len(b) { + n = len(b) + } + for ; i < n && a[i] == b[i]; i++ { + } + if i >= n { + // Do not shorten if one string is a prefix of the other + } else if c := a[i]; c < 0xff && c+1 < b[i] { + dst = append(dst, a[:i+1]...) + dst[len(dst)-1]++ + return dst + } + return nil +} + +func (bytesComparer) Successor(dst, b []byte) []byte { + for i, c := range b { + if c != 0xff { + dst = append(dst, b[:i+1]...) + dst[len(dst)-1]++ + return dst + } + } + return nil +} + +// DefaultComparer are default implementation of the Comparer interface. +// It uses the natural ordering, consistent with bytes.Compare. +var DefaultComparer = bytesComparer{} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go new file mode 100644 index 0000000000..2c522db23b --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go @@ -0,0 +1,57 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package comparer provides interface and implementation for ordering +// sets of data. +package comparer + +// BasicComparer is the interface that wraps the basic Compare method. +type BasicComparer interface { + // Compare returns -1, 0, or +1 depending on whether a is 'less than', + // 'equal to' or 'greater than' b. The two arguments can only be 'equal' + // if their contents are exactly equal. Furthermore, the empty slice + // must be 'less than' any non-empty slice. + Compare(a, b []byte) int +} + +// Comparer defines a total ordering over the space of []byte keys: a 'less +// than' relationship. +type Comparer interface { + BasicComparer + + // Name returns name of the comparer. + // + // The Level-DB on-disk format stores the comparer name, and opening a + // database with a different comparer from the one it was created with + // will result in an error. + // + // An implementation to a new name whenever the comparer implementation + // changes in a way that will cause the relative ordering of any two keys + // to change. + // + // Names starting with "leveldb." are reserved and should not be used + // by any users of this package. + Name() string + + // Bellow are advanced functions used to reduce the space requirements + // for internal data structures such as index blocks. + + // Separator appends a sequence of bytes x to dst such that a <= x && x < b, + // where 'less than' is consistent with Compare. An implementation should + // return nil if x equal to a. + // + // Either contents of a or b should not by any means modified. Doing so + // may cause corruption on the internal state. + Separator(dst, a, b []byte) []byte + + // Successor appends a sequence of bytes x to dst such that x >= b, where + // 'less than' is consistent with Compare. An implementation should return + // nil if x equal to b. + // + // Contents of b should not by any means modified. Doing so may cause + // corruption on the internal state. + Successor(dst, b []byte) []byte +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db.go b/vendor/github.com/syndtr/goleveldb/leveldb/db.go new file mode 100644 index 0000000000..b27c38d37e --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db.go @@ -0,0 +1,1175 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "container/list" + "fmt" + "io" + "os" + "runtime" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/table" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// DB is a LevelDB database. +type DB struct { + // Need 64-bit alignment. + seq uint64 + + // Stats. Need 64-bit alignment. + cWriteDelay int64 // The cumulative duration of write delays + cWriteDelayN int32 // The cumulative number of write delays + inWritePaused int32 // The indicator whether write operation is paused by compaction + aliveSnaps, aliveIters int32 + + // Session. + s *session + + // MemDB. + memMu sync.RWMutex + memPool chan *memdb.DB + mem, frozenMem *memDB + journal *journal.Writer + journalWriter storage.Writer + journalFd storage.FileDesc + frozenJournalFd storage.FileDesc + frozenSeq uint64 + + // Snapshot. + snapsMu sync.Mutex + snapsList *list.List + + // Write. + batchPool sync.Pool + writeMergeC chan writeMerge + writeMergedC chan bool + writeLockC chan struct{} + writeAckC chan error + writeDelay time.Duration + writeDelayN int + tr *Transaction + + // Compaction. + compCommitLk sync.Mutex + tcompCmdC chan cCmd + tcompPauseC chan chan<- struct{} + mcompCmdC chan cCmd + compErrC chan error + compPerErrC chan error + compErrSetC chan error + compWriteLocking bool + compStats cStats + memdbMaxLevel int // For testing. + + // Close. + closeW sync.WaitGroup + closeC chan struct{} + closed uint32 + closer io.Closer +} + +func openDB(s *session) (*DB, error) { + s.log("db@open opening") + start := time.Now() + db := &DB{ + s: s, + // Initial sequence + seq: s.stSeqNum, + // MemDB + memPool: make(chan *memdb.DB, 1), + // Snapshot + snapsList: list.New(), + // Write + batchPool: sync.Pool{New: newBatch}, + writeMergeC: make(chan writeMerge), + writeMergedC: make(chan bool), + writeLockC: make(chan struct{}, 1), + writeAckC: make(chan error), + // Compaction + tcompCmdC: make(chan cCmd), + tcompPauseC: make(chan chan<- struct{}), + mcompCmdC: make(chan cCmd), + compErrC: make(chan error), + compPerErrC: make(chan error), + compErrSetC: make(chan error), + // Close + closeC: make(chan struct{}), + } + + // Read-only mode. + readOnly := s.o.GetReadOnly() + + if readOnly { + // Recover journals (read-only mode). + if err := db.recoverJournalRO(); err != nil { + return nil, err + } + } else { + // Recover journals. + if err := db.recoverJournal(); err != nil { + return nil, err + } + + // Remove any obsolete files. + if err := db.checkAndCleanFiles(); err != nil { + // Close journal. + if db.journal != nil { + db.journal.Close() + db.journalWriter.Close() + } + return nil, err + } + + } + + // Doesn't need to be included in the wait group. + go db.compactionError() + go db.mpoolDrain() + + if readOnly { + db.SetReadOnly() + } else { + db.closeW.Add(2) + go db.tCompaction() + go db.mCompaction() + // go db.jWriter() + } + + s.logf("db@open done T·%v", time.Since(start)) + + runtime.SetFinalizer(db, (*DB).Close) + return db, nil +} + +// Open opens or creates a DB for the given storage. +// The DB will be created if not exist, unless ErrorIfMissing is true. +// Also, if ErrorIfExist is true and the DB exist Open will returns +// os.ErrExist error. +// +// Open will return an error with type of ErrCorrupted if corruption +// detected in the DB. Use errors.IsCorrupted to test whether an error is +// due to corruption. Corrupted DB can be recovered with Recover function. +// +// The returned DB instance is safe for concurrent use. +// The DB must be closed after use, by calling Close method. +func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) { + s, err := newSession(stor, o) + if err != nil { + return + } + defer func() { + if err != nil { + s.close() + s.release() + } + }() + + err = s.recover() + if err != nil { + if !os.IsNotExist(err) || s.o.GetErrorIfMissing() || s.o.GetReadOnly() { + return + } + err = s.create() + if err != nil { + return + } + } else if s.o.GetErrorIfExist() { + err = os.ErrExist + return + } + + return openDB(s) +} + +// OpenFile opens or creates a DB for the given path. +// The DB will be created if not exist, unless ErrorIfMissing is true. +// Also, if ErrorIfExist is true and the DB exist OpenFile will returns +// os.ErrExist error. +// +// OpenFile uses standard file-system backed storage implementation as +// described in the leveldb/storage package. +// +// OpenFile will return an error with type of ErrCorrupted if corruption +// detected in the DB. Use errors.IsCorrupted to test whether an error is +// due to corruption. Corrupted DB can be recovered with Recover function. +// +// The returned DB instance is safe for concurrent use. +// The DB must be closed after use, by calling Close method. +func OpenFile(path string, o *opt.Options) (db *DB, err error) { + stor, err := storage.OpenFile(path, o.GetReadOnly()) + if err != nil { + return + } + db, err = Open(stor, o) + if err != nil { + stor.Close() + } else { + db.closer = stor + } + return +} + +// Recover recovers and opens a DB with missing or corrupted manifest files +// for the given storage. It will ignore any manifest files, valid or not. +// The DB must already exist or it will returns an error. +// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options. +// +// The returned DB instance is safe for concurrent use. +// The DB must be closed after use, by calling Close method. +func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) { + s, err := newSession(stor, o) + if err != nil { + return + } + defer func() { + if err != nil { + s.close() + s.release() + } + }() + + err = recoverTable(s, o) + if err != nil { + return + } + return openDB(s) +} + +// RecoverFile recovers and opens a DB with missing or corrupted manifest files +// for the given path. It will ignore any manifest files, valid or not. +// The DB must already exist or it will returns an error. +// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options. +// +// RecoverFile uses standard file-system backed storage implementation as described +// in the leveldb/storage package. +// +// The returned DB instance is safe for concurrent use. +// The DB must be closed after use, by calling Close method. +func RecoverFile(path string, o *opt.Options) (db *DB, err error) { + stor, err := storage.OpenFile(path, false) + if err != nil { + return + } + db, err = Recover(stor, o) + if err != nil { + stor.Close() + } else { + db.closer = stor + } + return +} + +func recoverTable(s *session, o *opt.Options) error { + o = dupOptions(o) + // Mask StrictReader, lets StrictRecovery doing its job. + o.Strict &= ^opt.StrictReader + + // Get all tables and sort it by file number. + fds, err := s.stor.List(storage.TypeTable) + if err != nil { + return err + } + sortFds(fds) + + var ( + maxSeq uint64 + recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int + + // We will drop corrupted table. + strict = o.GetStrict(opt.StrictRecovery) + noSync = o.GetNoSync() + + rec = &sessionRecord{} + bpool = util.NewBufferPool(o.GetBlockSize() + 5) + ) + buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) { + tmpFd = s.newTemp() + writer, err := s.stor.Create(tmpFd) + if err != nil { + return + } + defer func() { + writer.Close() + if err != nil { + s.stor.Remove(tmpFd) + tmpFd = storage.FileDesc{} + } + }() + + // Copy entries. + tw := table.NewWriter(writer, o) + for iter.Next() { + key := iter.Key() + if validInternalKey(key) { + err = tw.Append(key, iter.Value()) + if err != nil { + return + } + } + } + err = iter.Error() + if err != nil && !errors.IsCorrupted(err) { + return + } + err = tw.Close() + if err != nil { + return + } + if !noSync { + err = writer.Sync() + if err != nil { + return + } + } + size = int64(tw.BytesLen()) + return + } + recoverTable := func(fd storage.FileDesc) error { + s.logf("table@recovery recovering @%d", fd.Num) + reader, err := s.stor.Open(fd) + if err != nil { + return err + } + var closed bool + defer func() { + if !closed { + reader.Close() + } + }() + + // Get file size. + size, err := reader.Seek(0, 2) + if err != nil { + return err + } + + var ( + tSeq uint64 + tgoodKey, tcorruptedKey, tcorruptedBlock int + imin, imax []byte + ) + tr, err := table.NewReader(reader, size, fd, nil, bpool, o) + if err != nil { + return err + } + iter := tr.NewIterator(nil, nil) + if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok { + itererr.SetErrorCallback(func(err error) { + if errors.IsCorrupted(err) { + s.logf("table@recovery block corruption @%d %q", fd.Num, err) + tcorruptedBlock++ + } + }) + } + + // Scan the table. + for iter.Next() { + key := iter.Key() + _, seq, _, kerr := parseInternalKey(key) + if kerr != nil { + tcorruptedKey++ + continue + } + tgoodKey++ + if seq > tSeq { + tSeq = seq + } + if imin == nil { + imin = append([]byte{}, key...) + } + imax = append(imax[:0], key...) + } + if err := iter.Error(); err != nil && !errors.IsCorrupted(err) { + iter.Release() + return err + } + iter.Release() + + goodKey += tgoodKey + corruptedKey += tcorruptedKey + corruptedBlock += tcorruptedBlock + + if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { + droppedTable++ + s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) + return nil + } + + if tgoodKey > 0 { + if tcorruptedKey > 0 || tcorruptedBlock > 0 { + // Rebuild the table. + s.logf("table@recovery rebuilding @%d", fd.Num) + iter := tr.NewIterator(nil, nil) + tmpFd, newSize, err := buildTable(iter) + iter.Release() + if err != nil { + return err + } + closed = true + reader.Close() + if err := s.stor.Rename(tmpFd, fd); err != nil { + return err + } + size = newSize + } + if tSeq > maxSeq { + maxSeq = tSeq + } + recoveredKey += tgoodKey + // Add table to level 0. + rec.addTable(0, fd.Num, size, imin, imax) + s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) + } else { + droppedTable++ + s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size) + } + + return nil + } + + // Recover all tables. + if len(fds) > 0 { + s.logf("table@recovery F·%d", len(fds)) + + // Mark file number as used. + s.markFileNum(fds[len(fds)-1].Num) + + for _, fd := range fds { + if err := recoverTable(fd); err != nil { + return err + } + } + + s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq) + } + + // Set sequence number. + rec.setSeqNum(maxSeq) + + // Create new manifest. + if err := s.create(); err != nil { + return err + } + + // Commit. + return s.commit(rec) +} + +func (db *DB) recoverJournal() error { + // Get all journals and sort it by file number. + rawFds, err := db.s.stor.List(storage.TypeJournal) + if err != nil { + return err + } + sortFds(rawFds) + + // Journals that will be recovered. + var fds []storage.FileDesc + for _, fd := range rawFds { + if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { + fds = append(fds, fd) + } + } + + var ( + ofd storage.FileDesc // Obsolete file. + rec = &sessionRecord{} + ) + + // Recover journals. + if len(fds) > 0 { + db.logf("journal@recovery F·%d", len(fds)) + + // Mark file number as used. + db.s.markFileNum(fds[len(fds)-1].Num) + + var ( + // Options. + strict = db.s.o.GetStrict(opt.StrictJournal) + checksum = db.s.o.GetStrict(opt.StrictJournalChecksum) + writeBuffer = db.s.o.GetWriteBuffer() + + jr *journal.Reader + mdb = memdb.New(db.s.icmp, writeBuffer) + buf = &util.Buffer{} + batchSeq uint64 + batchLen int + ) + + for _, fd := range fds { + db.logf("journal@recovery recovering @%d", fd.Num) + + fr, err := db.s.stor.Open(fd) + if err != nil { + return err + } + + // Create or reset journal reader instance. + if jr == nil { + jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum) + } else { + jr.Reset(fr, dropper{db.s, fd}, strict, checksum) + } + + // Flush memdb and remove obsolete journal file. + if !ofd.Zero() { + if mdb.Len() > 0 { + if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { + fr.Close() + return err + } + } + + rec.setJournalNum(fd.Num) + rec.setSeqNum(db.seq) + if err := db.s.commit(rec); err != nil { + fr.Close() + return err + } + rec.resetAddedTables() + + db.s.stor.Remove(ofd) + ofd = storage.FileDesc{} + } + + // Replay journal to memdb. + mdb.Reset() + for { + r, err := jr.Next() + if err != nil { + if err == io.EOF { + break + } + + fr.Close() + return errors.SetFd(err, fd) + } + + buf.Reset() + if _, err := buf.ReadFrom(r); err != nil { + if err == io.ErrUnexpectedEOF { + // This is error returned due to corruption, with strict == false. + continue + } + + fr.Close() + return errors.SetFd(err, fd) + } + batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb) + if err != nil { + if !strict && errors.IsCorrupted(err) { + db.s.logf("journal error: %v (skipped)", err) + // We won't apply sequence number as it might be corrupted. + continue + } + + fr.Close() + return errors.SetFd(err, fd) + } + + // Save sequence number. + db.seq = batchSeq + uint64(batchLen) + + // Flush it if large enough. + if mdb.Size() >= writeBuffer { + if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { + fr.Close() + return err + } + + mdb.Reset() + } + } + + fr.Close() + ofd = fd + } + + // Flush the last memdb. + if mdb.Len() > 0 { + if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { + return err + } + } + } + + // Create a new journal. + if _, err := db.newMem(0); err != nil { + return err + } + + // Commit. + rec.setJournalNum(db.journalFd.Num) + rec.setSeqNum(db.seq) + if err := db.s.commit(rec); err != nil { + // Close journal on error. + if db.journal != nil { + db.journal.Close() + db.journalWriter.Close() + } + return err + } + + // Remove the last obsolete journal file. + if !ofd.Zero() { + db.s.stor.Remove(ofd) + } + + return nil +} + +func (db *DB) recoverJournalRO() error { + // Get all journals and sort it by file number. + rawFds, err := db.s.stor.List(storage.TypeJournal) + if err != nil { + return err + } + sortFds(rawFds) + + // Journals that will be recovered. + var fds []storage.FileDesc + for _, fd := range rawFds { + if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { + fds = append(fds, fd) + } + } + + var ( + // Options. + strict = db.s.o.GetStrict(opt.StrictJournal) + checksum = db.s.o.GetStrict(opt.StrictJournalChecksum) + writeBuffer = db.s.o.GetWriteBuffer() + + mdb = memdb.New(db.s.icmp, writeBuffer) + ) + + // Recover journals. + if len(fds) > 0 { + db.logf("journal@recovery RO·Mode F·%d", len(fds)) + + var ( + jr *journal.Reader + buf = &util.Buffer{} + batchSeq uint64 + batchLen int + ) + + for _, fd := range fds { + db.logf("journal@recovery recovering @%d", fd.Num) + + fr, err := db.s.stor.Open(fd) + if err != nil { + return err + } + + // Create or reset journal reader instance. + if jr == nil { + jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum) + } else { + jr.Reset(fr, dropper{db.s, fd}, strict, checksum) + } + + // Replay journal to memdb. + for { + r, err := jr.Next() + if err != nil { + if err == io.EOF { + break + } + + fr.Close() + return errors.SetFd(err, fd) + } + + buf.Reset() + if _, err := buf.ReadFrom(r); err != nil { + if err == io.ErrUnexpectedEOF { + // This is error returned due to corruption, with strict == false. + continue + } + + fr.Close() + return errors.SetFd(err, fd) + } + batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb) + if err != nil { + if !strict && errors.IsCorrupted(err) { + db.s.logf("journal error: %v (skipped)", err) + // We won't apply sequence number as it might be corrupted. + continue + } + + fr.Close() + return errors.SetFd(err, fd) + } + + // Save sequence number. + db.seq = batchSeq + uint64(batchLen) + } + + fr.Close() + } + } + + // Set memDB. + db.mem = &memDB{db: db, DB: mdb, ref: 1} + + return nil +} + +func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) { + mk, mv, err := mdb.Find(ikey) + if err == nil { + ukey, _, kt, kerr := parseInternalKey(mk) + if kerr != nil { + // Shouldn't have had happen. + panic(kerr) + } + if icmp.uCompare(ukey, ikey.ukey()) == 0 { + if kt == keyTypeDel { + return true, nil, ErrNotFound + } + return true, mv, nil + + } + } else if err != ErrNotFound { + return true, nil, err + } + return +} + +func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { + ikey := makeInternalKey(nil, key, seq, keyTypeSeek) + + if auxm != nil { + if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok { + return append([]byte{}, mv...), me + } + } + + em, fm := db.getMems() + for _, m := range [...]*memDB{em, fm} { + if m == nil { + continue + } + defer m.decref() + + if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok { + return append([]byte{}, mv...), me + } + } + + v := db.s.version() + value, cSched, err := v.get(auxt, ikey, ro, false) + v.release() + if cSched { + // Trigger table compaction. + db.compTrigger(db.tcompCmdC) + } + return +} + +func nilIfNotFound(err error) error { + if err == ErrNotFound { + return nil + } + return err +} + +func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) { + ikey := makeInternalKey(nil, key, seq, keyTypeSeek) + + if auxm != nil { + if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok { + return me == nil, nilIfNotFound(me) + } + } + + em, fm := db.getMems() + for _, m := range [...]*memDB{em, fm} { + if m == nil { + continue + } + defer m.decref() + + if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok { + return me == nil, nilIfNotFound(me) + } + } + + v := db.s.version() + _, cSched, err := v.get(auxt, ikey, ro, true) + v.release() + if cSched { + // Trigger table compaction. + db.compTrigger(db.tcompCmdC) + } + if err == nil { + ret = true + } else if err == ErrNotFound { + err = nil + } + return +} + +// Get gets the value for the given key. It returns ErrNotFound if the +// DB does not contains the key. +// +// The returned slice is its own copy, it is safe to modify the contents +// of the returned slice. +// It is safe to modify the contents of the argument after Get returns. +func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + err = db.ok() + if err != nil { + return + } + + se := db.acquireSnapshot() + defer db.releaseSnapshot(se) + return db.get(nil, nil, key, se.seq, ro) +} + +// Has returns true if the DB does contains the given key. +// +// It is safe to modify the contents of the argument after Has returns. +func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) { + err = db.ok() + if err != nil { + return + } + + se := db.acquireSnapshot() + defer db.releaseSnapshot(se) + return db.has(nil, nil, key, se.seq, ro) +} + +// NewIterator returns an iterator for the latest snapshot of the +// underlying DB. +// The returned iterator is not safe for concurrent use, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently with modifying its +// underlying DB. The resultant key/value pairs are guaranteed to be +// consistent. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + if err := db.ok(); err != nil { + return iterator.NewEmptyIterator(err) + } + + se := db.acquireSnapshot() + defer db.releaseSnapshot(se) + // Iterator holds 'version' lock, 'version' is immutable so snapshot + // can be released after iterator created. + return db.newIterator(nil, nil, se.seq, slice, ro) +} + +// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot +// is a frozen snapshot of a DB state at a particular point in time. The +// content of snapshot are guaranteed to be consistent. +// +// The snapshot must be released after use, by calling Release method. +func (db *DB) GetSnapshot() (*Snapshot, error) { + if err := db.ok(); err != nil { + return nil, err + } + + return db.newSnapshot(), nil +} + +// GetProperty returns value of the given property name. +// +// Property names: +// leveldb.num-files-at-level{n} +// Returns the number of files at level 'n'. +// leveldb.stats +// Returns statistics of the underlying DB. +// leveldb.iostats +// Returns statistics of effective disk read and write. +// leveldb.writedelay +// Returns cumulative write delay caused by compaction. +// leveldb.sstables +// Returns sstables list for each level. +// leveldb.blockpool +// Returns block pool stats. +// leveldb.cachedblock +// Returns size of cached block. +// leveldb.openedtables +// Returns number of opened tables. +// leveldb.alivesnaps +// Returns number of alive snapshots. +// leveldb.aliveiters +// Returns number of alive iterators. +func (db *DB) GetProperty(name string) (value string, err error) { + err = db.ok() + if err != nil { + return + } + + const prefix = "leveldb." + if !strings.HasPrefix(name, prefix) { + return "", ErrNotFound + } + p := name[len(prefix):] + + v := db.s.version() + defer v.release() + + numFilesPrefix := "num-files-at-level" + switch { + case strings.HasPrefix(p, numFilesPrefix): + var level uint + var rest string + n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest) + if n != 1 { + err = ErrNotFound + } else { + value = fmt.Sprint(v.tLen(int(level))) + } + case p == "stats": + value = "Compactions\n" + + " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" + + "-------+------------+---------------+---------------+---------------+---------------\n" + for level, tables := range v.levels { + duration, read, write := db.compStats.getStat(level) + if len(tables) == 0 && duration == 0 { + continue + } + value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n", + level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(), + float64(read)/1048576.0, float64(write)/1048576.0) + } + case p == "iostats": + value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f", + float64(db.s.stor.reads())/1048576.0, + float64(db.s.stor.writes())/1048576.0) + case p == "writedelay": + writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay)) + paused := atomic.LoadInt32(&db.inWritePaused) == 1 + value = fmt.Sprintf("DelayN:%d Delay:%s Paused:%t", writeDelayN, writeDelay, paused) + case p == "sstables": + for level, tables := range v.levels { + value += fmt.Sprintf("--- level %d ---\n", level) + for _, t := range tables { + value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax) + } + } + case p == "blockpool": + value = fmt.Sprintf("%v", db.s.tops.bpool) + case p == "cachedblock": + if db.s.tops.bcache != nil { + value = fmt.Sprintf("%d", db.s.tops.bcache.Size()) + } else { + value = "<nil>" + } + case p == "openedtables": + value = fmt.Sprintf("%d", db.s.tops.cache.Size()) + case p == "alivesnaps": + value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps)) + case p == "aliveiters": + value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters)) + default: + err = ErrNotFound + } + + return +} + +// DBStats is database statistics. +type DBStats struct { + WriteDelayCount int32 + WriteDelayDuration time.Duration + WritePaused bool + + AliveSnapshots int32 + AliveIterators int32 + + IOWrite uint64 + IORead uint64 + + BlockCacheSize int + OpenedTablesCount int + + LevelSizes []int64 + LevelTablesCounts []int + LevelRead []int64 + LevelWrite []int64 + LevelDurations []time.Duration +} + +// Stats populates s with database statistics. +func (db *DB) Stats(s *DBStats) error { + err := db.ok() + if err != nil { + return err + } + + s.IORead = db.s.stor.reads() + s.IOWrite = db.s.stor.writes() + s.WriteDelayCount = atomic.LoadInt32(&db.cWriteDelayN) + s.WriteDelayDuration = time.Duration(atomic.LoadInt64(&db.cWriteDelay)) + s.WritePaused = atomic.LoadInt32(&db.inWritePaused) == 1 + + s.OpenedTablesCount = db.s.tops.cache.Size() + if db.s.tops.bcache != nil { + s.BlockCacheSize = db.s.tops.bcache.Size() + } else { + s.BlockCacheSize = 0 + } + + s.AliveIterators = atomic.LoadInt32(&db.aliveIters) + s.AliveSnapshots = atomic.LoadInt32(&db.aliveSnaps) + + s.LevelDurations = s.LevelDurations[:0] + s.LevelRead = s.LevelRead[:0] + s.LevelWrite = s.LevelWrite[:0] + s.LevelSizes = s.LevelSizes[:0] + s.LevelTablesCounts = s.LevelTablesCounts[:0] + + v := db.s.version() + defer v.release() + + for level, tables := range v.levels { + duration, read, write := db.compStats.getStat(level) + if len(tables) == 0 && duration == 0 { + continue + } + s.LevelDurations = append(s.LevelDurations, duration) + s.LevelRead = append(s.LevelRead, read) + s.LevelWrite = append(s.LevelWrite, write) + s.LevelSizes = append(s.LevelSizes, tables.size()) + s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables)) + } + + return nil +} + +// SizeOf calculates approximate sizes of the given key ranges. +// The length of the returned sizes are equal with the length of the given +// ranges. The returned sizes measure storage space usage, so if the user +// data compresses by a factor of ten, the returned sizes will be one-tenth +// the size of the corresponding user data size. +// The results may not include the sizes of recently written data. +func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { + if err := db.ok(); err != nil { + return nil, err + } + + v := db.s.version() + defer v.release() + + sizes := make(Sizes, 0, len(ranges)) + for _, r := range ranges { + imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek) + imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek) + start, err := v.offsetOf(imin) + if err != nil { + return nil, err + } + limit, err := v.offsetOf(imax) + if err != nil { + return nil, err + } + var size int64 + if limit >= start { + size = limit - start + } + sizes = append(sizes, size) + } + + return sizes, nil +} + +// Close closes the DB. This will also releases any outstanding snapshot, +// abort any in-flight compaction and discard open transaction. +// +// It is not safe to close a DB until all outstanding iterators are released. +// It is valid to call Close multiple times. Other methods should not be +// called after the DB has been closed. +func (db *DB) Close() error { + if !db.setClosed() { + return ErrClosed + } + + start := time.Now() + db.log("db@close closing") + + // Clear the finalizer. + runtime.SetFinalizer(db, nil) + + // Get compaction error. + var err error + select { + case err = <-db.compErrC: + if err == ErrReadOnly { + err = nil + } + default: + } + + // Signal all goroutines. + close(db.closeC) + + // Discard open transaction. + if db.tr != nil { + db.tr.Discard() + } + + // Acquire writer lock. + db.writeLockC <- struct{}{} + + // Wait for all gorotines to exit. + db.closeW.Wait() + + // Closes journal. + if db.journal != nil { + db.journal.Close() + db.journalWriter.Close() + db.journal = nil + db.journalWriter = nil + } + + if db.writeDelayN > 0 { + db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay) + } + + // Close session. + db.s.close() + db.logf("db@close done T·%v", time.Since(start)) + db.s.release() + + if db.closer != nil { + if err1 := db.closer.Close(); err == nil { + err = err1 + } + db.closer = nil + } + + // Clear memdbs. + db.clearMems() + + return err +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go new file mode 100644 index 0000000000..0c1b9a53b8 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go @@ -0,0 +1,854 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +var ( + errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting") +) + +type cStat struct { + duration time.Duration + read int64 + write int64 +} + +func (p *cStat) add(n *cStatStaging) { + p.duration += n.duration + p.read += n.read + p.write += n.write +} + +func (p *cStat) get() (duration time.Duration, read, write int64) { + return p.duration, p.read, p.write +} + +type cStatStaging struct { + start time.Time + duration time.Duration + on bool + read int64 + write int64 +} + +func (p *cStatStaging) startTimer() { + if !p.on { + p.start = time.Now() + p.on = true + } +} + +func (p *cStatStaging) stopTimer() { + if p.on { + p.duration += time.Since(p.start) + p.on = false + } +} + +type cStats struct { + lk sync.Mutex + stats []cStat +} + +func (p *cStats) addStat(level int, n *cStatStaging) { + p.lk.Lock() + if level >= len(p.stats) { + newStats := make([]cStat, level+1) + copy(newStats, p.stats) + p.stats = newStats + } + p.stats[level].add(n) + p.lk.Unlock() +} + +func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) { + p.lk.Lock() + defer p.lk.Unlock() + if level < len(p.stats) { + return p.stats[level].get() + } + return +} + +func (db *DB) compactionError() { + var err error +noerr: + // No error. + for { + select { + case err = <-db.compErrSetC: + switch { + case err == nil: + case err == ErrReadOnly, errors.IsCorrupted(err): + goto hasperr + default: + goto haserr + } + case <-db.closeC: + return + } + } +haserr: + // Transient error. + for { + select { + case db.compErrC <- err: + case err = <-db.compErrSetC: + switch { + case err == nil: + goto noerr + case err == ErrReadOnly, errors.IsCorrupted(err): + goto hasperr + default: + } + case <-db.closeC: + return + } + } +hasperr: + // Persistent error. + for { + select { + case db.compErrC <- err: + case db.compPerErrC <- err: + case db.writeLockC <- struct{}{}: + // Hold write lock, so that write won't pass-through. + db.compWriteLocking = true + case <-db.closeC: + if db.compWriteLocking { + // We should release the lock or Close will hang. + <-db.writeLockC + } + return + } + } +} + +type compactionTransactCounter int + +func (cnt *compactionTransactCounter) incr() { + *cnt++ +} + +type compactionTransactInterface interface { + run(cnt *compactionTransactCounter) error + revert() error +} + +func (db *DB) compactionTransact(name string, t compactionTransactInterface) { + defer func() { + if x := recover(); x != nil { + if x == errCompactionTransactExiting { + if err := t.revert(); err != nil { + db.logf("%s revert error %q", name, err) + } + } + panic(x) + } + }() + + const ( + backoffMin = 1 * time.Second + backoffMax = 8 * time.Second + backoffMul = 2 * time.Second + ) + var ( + backoff = backoffMin + backoffT = time.NewTimer(backoff) + lastCnt = compactionTransactCounter(0) + + disableBackoff = db.s.o.GetDisableCompactionBackoff() + ) + for n := 0; ; n++ { + // Check whether the DB is closed. + if db.isClosed() { + db.logf("%s exiting", name) + db.compactionExitTransact() + } else if n > 0 { + db.logf("%s retrying N·%d", name, n) + } + + // Execute. + cnt := compactionTransactCounter(0) + err := t.run(&cnt) + if err != nil { + db.logf("%s error I·%d %q", name, cnt, err) + } + + // Set compaction error status. + select { + case db.compErrSetC <- err: + case perr := <-db.compPerErrC: + if err != nil { + db.logf("%s exiting (persistent error %q)", name, perr) + db.compactionExitTransact() + } + case <-db.closeC: + db.logf("%s exiting", name) + db.compactionExitTransact() + } + if err == nil { + return + } + if errors.IsCorrupted(err) { + db.logf("%s exiting (corruption detected)", name) + db.compactionExitTransact() + } + + if !disableBackoff { + // Reset backoff duration if counter is advancing. + if cnt > lastCnt { + backoff = backoffMin + lastCnt = cnt + } + + // Backoff. + backoffT.Reset(backoff) + if backoff < backoffMax { + backoff *= backoffMul + if backoff > backoffMax { + backoff = backoffMax + } + } + select { + case <-backoffT.C: + case <-db.closeC: + db.logf("%s exiting", name) + db.compactionExitTransact() + } + } + } +} + +type compactionTransactFunc struct { + runFunc func(cnt *compactionTransactCounter) error + revertFunc func() error +} + +func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error { + return t.runFunc(cnt) +} + +func (t *compactionTransactFunc) revert() error { + if t.revertFunc != nil { + return t.revertFunc() + } + return nil +} + +func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) { + db.compactionTransact(name, &compactionTransactFunc{run, revert}) +} + +func (db *DB) compactionExitTransact() { + panic(errCompactionTransactExiting) +} + +func (db *DB) compactionCommit(name string, rec *sessionRecord) { + db.compCommitLk.Lock() + defer db.compCommitLk.Unlock() // Defer is necessary. + db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error { + return db.s.commit(rec) + }, nil) +} + +func (db *DB) memCompaction() { + mdb := db.getFrozenMem() + if mdb == nil { + return + } + defer mdb.decref() + + db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size())) + + // Don't compact empty memdb. + if mdb.Len() == 0 { + db.logf("memdb@flush skipping") + // drop frozen memdb + db.dropFrozenMem() + return + } + + // Pause table compaction. + resumeC := make(chan struct{}) + select { + case db.tcompPauseC <- (chan<- struct{})(resumeC): + case <-db.compPerErrC: + close(resumeC) + resumeC = nil + case <-db.closeC: + db.compactionExitTransact() + } + + var ( + rec = &sessionRecord{} + stats = &cStatStaging{} + flushLevel int + ) + + // Generate tables. + db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) { + stats.startTimer() + flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel) + stats.stopTimer() + return + }, func() error { + for _, r := range rec.addedTables { + db.logf("memdb@flush revert @%d", r.num) + if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil { + return err + } + } + return nil + }) + + rec.setJournalNum(db.journalFd.Num) + rec.setSeqNum(db.frozenSeq) + + // Commit. + stats.startTimer() + db.compactionCommit("memdb", rec) + stats.stopTimer() + + db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration) + + for _, r := range rec.addedTables { + stats.write += r.size + } + db.compStats.addStat(flushLevel, stats) + + // Drop frozen memdb. + db.dropFrozenMem() + + // Resume table compaction. + if resumeC != nil { + select { + case <-resumeC: + close(resumeC) + case <-db.closeC: + db.compactionExitTransact() + } + } + + // Trigger table compaction. + db.compTrigger(db.tcompCmdC) +} + +type tableCompactionBuilder struct { + db *DB + s *session + c *compaction + rec *sessionRecord + stat0, stat1 *cStatStaging + + snapHasLastUkey bool + snapLastUkey []byte + snapLastSeq uint64 + snapIter int + snapKerrCnt int + snapDropCnt int + + kerrCnt int + dropCnt int + + minSeq uint64 + strict bool + tableSize int + + tw *tWriter +} + +func (b *tableCompactionBuilder) appendKV(key, value []byte) error { + // Create new table if not already. + if b.tw == nil { + // Check for pause event. + if b.db != nil { + select { + case ch := <-b.db.tcompPauseC: + b.db.pauseCompaction(ch) + case <-b.db.closeC: + b.db.compactionExitTransact() + default: + } + } + + // Create new table. + var err error + b.tw, err = b.s.tops.create() + if err != nil { + return err + } + } + + // Write key/value into table. + return b.tw.append(key, value) +} + +func (b *tableCompactionBuilder) needFlush() bool { + return b.tw.tw.BytesLen() >= b.tableSize +} + +func (b *tableCompactionBuilder) flush() error { + t, err := b.tw.finish() + if err != nil { + return err + } + b.rec.addTableFile(b.c.sourceLevel+1, t) + b.stat1.write += t.size + b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax) + b.tw = nil + return nil +} + +func (b *tableCompactionBuilder) cleanup() { + if b.tw != nil { + b.tw.drop() + b.tw = nil + } +} + +func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { + snapResumed := b.snapIter > 0 + hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary. + lastUkey := append([]byte{}, b.snapLastUkey...) + lastSeq := b.snapLastSeq + b.kerrCnt = b.snapKerrCnt + b.dropCnt = b.snapDropCnt + // Restore compaction state. + b.c.restore() + + defer b.cleanup() + + b.stat1.startTimer() + defer b.stat1.stopTimer() + + iter := b.c.newIterator() + defer iter.Release() + for i := 0; iter.Next(); i++ { + // Incr transact counter. + cnt.incr() + + // Skip until last state. + if i < b.snapIter { + continue + } + + resumed := false + if snapResumed { + resumed = true + snapResumed = false + } + + ikey := iter.Key() + ukey, seq, kt, kerr := parseInternalKey(ikey) + + if kerr == nil { + shouldStop := !resumed && b.c.shouldStopBefore(ikey) + + if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 { + // First occurrence of this user key. + + // Only rotate tables if ukey doesn't hop across. + if b.tw != nil && (shouldStop || b.needFlush()) { + if err := b.flush(); err != nil { + return err + } + + // Creates snapshot of the state. + b.c.save() + b.snapHasLastUkey = hasLastUkey + b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...) + b.snapLastSeq = lastSeq + b.snapIter = i + b.snapKerrCnt = b.kerrCnt + b.snapDropCnt = b.dropCnt + } + + hasLastUkey = true + lastUkey = append(lastUkey[:0], ukey...) + lastSeq = keyMaxSeq + } + + switch { + case lastSeq <= b.minSeq: + // Dropped because newer entry for same user key exist + fallthrough // (A) + case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey): + // For this user key: + // (1) there is no data in higher levels + // (2) data in lower levels will have larger seq numbers + // (3) data in layers that are being compacted here and have + // smaller seq numbers will be dropped in the next + // few iterations of this loop (by rule (A) above). + // Therefore this deletion marker is obsolete and can be dropped. + lastSeq = seq + b.dropCnt++ + continue + default: + lastSeq = seq + } + } else { + if b.strict { + return kerr + } + + // Don't drop corrupted keys. + hasLastUkey = false + lastUkey = lastUkey[:0] + lastSeq = keyMaxSeq + b.kerrCnt++ + } + + if err := b.appendKV(ikey, iter.Value()); err != nil { + return err + } + } + + if err := iter.Error(); err != nil { + return err + } + + // Finish last table. + if b.tw != nil && !b.tw.empty() { + return b.flush() + } + return nil +} + +func (b *tableCompactionBuilder) revert() error { + for _, at := range b.rec.addedTables { + b.s.logf("table@build revert @%d", at.num) + if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil { + return err + } + } + return nil +} + +func (db *DB) tableCompaction(c *compaction, noTrivial bool) { + defer c.release() + + rec := &sessionRecord{} + rec.addCompPtr(c.sourceLevel, c.imax) + + if !noTrivial && c.trivial() { + t := c.levels[0][0] + db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1) + rec.delTable(c.sourceLevel, t.fd.Num) + rec.addTableFile(c.sourceLevel+1, t) + db.compactionCommit("table-move", rec) + return + } + + var stats [2]cStatStaging + for i, tables := range c.levels { + for _, t := range tables { + stats[i].read += t.size + // Insert deleted tables into record + rec.delTable(c.sourceLevel+i, t.fd.Num) + } + } + sourceSize := int(stats[0].read + stats[1].read) + minSeq := db.minSeq() + db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq) + + b := &tableCompactionBuilder{ + db: db, + s: db.s, + c: c, + rec: rec, + stat1: &stats[1], + minSeq: minSeq, + strict: db.s.o.GetStrict(opt.StrictCompaction), + tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1), + } + db.compactionTransact("table@build", b) + + // Commit. + stats[1].startTimer() + db.compactionCommit("table", rec) + stats[1].stopTimer() + + resultSize := int(stats[1].write) + db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration) + + // Save compaction stats + for i := range stats { + db.compStats.addStat(c.sourceLevel+1, &stats[i]) + } +} + +func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error { + db.logf("table@compaction range L%d %q:%q", level, umin, umax) + if level >= 0 { + if c := db.s.getCompactionRange(level, umin, umax, true); c != nil { + db.tableCompaction(c, true) + } + } else { + // Retry until nothing to compact. + for { + compacted := false + + // Scan for maximum level with overlapped tables. + v := db.s.version() + m := 1 + for i := m; i < len(v.levels); i++ { + tables := v.levels[i] + if tables.overlaps(db.s.icmp, umin, umax, false) { + m = i + } + } + v.release() + + for level := 0; level < m; level++ { + if c := db.s.getCompactionRange(level, umin, umax, false); c != nil { + db.tableCompaction(c, true) + compacted = true + } + } + + if !compacted { + break + } + } + } + + return nil +} + +func (db *DB) tableAutoCompaction() { + if c := db.s.pickCompaction(); c != nil { + db.tableCompaction(c, false) + } +} + +func (db *DB) tableNeedCompaction() bool { + v := db.s.version() + defer v.release() + return v.needCompaction() +} + +// resumeWrite returns an indicator whether we should resume write operation if enough level0 files are compacted. +func (db *DB) resumeWrite() bool { + v := db.s.version() + defer v.release() + if v.tLen(0) < db.s.o.GetWriteL0PauseTrigger() { + return true + } + return false +} + +func (db *DB) pauseCompaction(ch chan<- struct{}) { + select { + case ch <- struct{}{}: + case <-db.closeC: + db.compactionExitTransact() + } +} + +type cCmd interface { + ack(err error) +} + +type cAuto struct { + // Note for table compaction, an non-empty ackC represents it's a compaction waiting command. + ackC chan<- error +} + +func (r cAuto) ack(err error) { + if r.ackC != nil { + defer func() { + recover() + }() + r.ackC <- err + } +} + +type cRange struct { + level int + min, max []byte + ackC chan<- error +} + +func (r cRange) ack(err error) { + if r.ackC != nil { + defer func() { + recover() + }() + r.ackC <- err + } +} + +// This will trigger auto compaction but will not wait for it. +func (db *DB) compTrigger(compC chan<- cCmd) { + select { + case compC <- cAuto{}: + default: + } +} + +// This will trigger auto compaction and/or wait for all compaction to be done. +func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) { + ch := make(chan error) + defer close(ch) + // Send cmd. + select { + case compC <- cAuto{ch}: + case err = <-db.compErrC: + return + case <-db.closeC: + return ErrClosed + } + // Wait cmd. + select { + case err = <-ch: + case err = <-db.compErrC: + case <-db.closeC: + return ErrClosed + } + return err +} + +// Send range compaction request. +func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) { + ch := make(chan error) + defer close(ch) + // Send cmd. + select { + case compC <- cRange{level, min, max, ch}: + case err := <-db.compErrC: + return err + case <-db.closeC: + return ErrClosed + } + // Wait cmd. + select { + case err = <-ch: + case err = <-db.compErrC: + case <-db.closeC: + return ErrClosed + } + return err +} + +func (db *DB) mCompaction() { + var x cCmd + + defer func() { + if x := recover(); x != nil { + if x != errCompactionTransactExiting { + panic(x) + } + } + if x != nil { + x.ack(ErrClosed) + } + db.closeW.Done() + }() + + for { + select { + case x = <-db.mcompCmdC: + switch x.(type) { + case cAuto: + db.memCompaction() + x.ack(nil) + x = nil + default: + panic("leveldb: unknown command") + } + case <-db.closeC: + return + } + } +} + +func (db *DB) tCompaction() { + var ( + x cCmd + waitQ []cCmd + ) + + defer func() { + if x := recover(); x != nil { + if x != errCompactionTransactExiting { + panic(x) + } + } + for i := range waitQ { + waitQ[i].ack(ErrClosed) + waitQ[i] = nil + } + if x != nil { + x.ack(ErrClosed) + } + db.closeW.Done() + }() + + for { + if db.tableNeedCompaction() { + select { + case x = <-db.tcompCmdC: + case ch := <-db.tcompPauseC: + db.pauseCompaction(ch) + continue + case <-db.closeC: + return + default: + } + // Resume write operation as soon as possible. + if len(waitQ) > 0 && db.resumeWrite() { + for i := range waitQ { + waitQ[i].ack(nil) + waitQ[i] = nil + } + waitQ = waitQ[:0] + } + } else { + for i := range waitQ { + waitQ[i].ack(nil) + waitQ[i] = nil + } + waitQ = waitQ[:0] + select { + case x = <-db.tcompCmdC: + case ch := <-db.tcompPauseC: + db.pauseCompaction(ch) + continue + case <-db.closeC: + return + } + } + if x != nil { + switch cmd := x.(type) { + case cAuto: + if cmd.ackC != nil { + // Check the write pause state before caching it. + if db.resumeWrite() { + x.ack(nil) + } else { + waitQ = append(waitQ, x) + } + } + case cRange: + x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max)) + default: + panic("leveldb: unknown command") + } + x = nil + } + db.tableAutoCompaction() + } +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go new file mode 100644 index 0000000000..03c24cdab5 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go @@ -0,0 +1,360 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "math/rand" + "runtime" + "sync" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key") +) + +type memdbReleaser struct { + once sync.Once + m *memDB +} + +func (mr *memdbReleaser) Release() { + mr.once.Do(func() { + mr.m.decref() + }) +} + +func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader) + em, fm := db.getMems() + v := db.s.version() + + tableIts := v.getIterators(slice, ro) + n := len(tableIts) + len(auxt) + 3 + its := make([]iterator.Iterator, 0, n) + + if auxm != nil { + ami := auxm.NewIterator(slice) + ami.SetReleaser(&memdbReleaser{m: auxm}) + its = append(its, ami) + } + for _, t := range auxt { + its = append(its, v.s.tops.newIterator(t, slice, ro)) + } + + emi := em.NewIterator(slice) + emi.SetReleaser(&memdbReleaser{m: em}) + its = append(its, emi) + if fm != nil { + fmi := fm.NewIterator(slice) + fmi.SetReleaser(&memdbReleaser{m: fm}) + its = append(its, fmi) + } + its = append(its, tableIts...) + mi := iterator.NewMergedIterator(its, db.s.icmp, strict) + mi.SetReleaser(&versionReleaser{v: v}) + return mi +} + +func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter { + var islice *util.Range + if slice != nil { + islice = &util.Range{} + if slice.Start != nil { + islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek) + } + if slice.Limit != nil { + islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek) + } + } + rawIter := db.newRawIterator(auxm, auxt, islice, ro) + iter := &dbIter{ + db: db, + icmp: db.s.icmp, + iter: rawIter, + seq: seq, + strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader), + key: make([]byte, 0), + value: make([]byte, 0), + } + atomic.AddInt32(&db.aliveIters, 1) + runtime.SetFinalizer(iter, (*dbIter).Release) + return iter +} + +func (db *DB) iterSamplingRate() int { + return rand.Intn(2 * db.s.o.GetIteratorSamplingRate()) +} + +type dir int + +const ( + dirReleased dir = iota - 1 + dirSOI + dirEOI + dirBackward + dirForward +) + +// dbIter represent an interator states over a database session. +type dbIter struct { + db *DB + icmp *iComparer + iter iterator.Iterator + seq uint64 + strict bool + + smaplingGap int + dir dir + key []byte + value []byte + err error + releaser util.Releaser +} + +func (i *dbIter) sampleSeek() { + ikey := i.iter.Key() + i.smaplingGap -= len(ikey) + len(i.iter.Value()) + for i.smaplingGap < 0 { + i.smaplingGap += i.db.iterSamplingRate() + i.db.sampleSeek(ikey) + } +} + +func (i *dbIter) setErr(err error) { + i.err = err + i.key = nil + i.value = nil +} + +func (i *dbIter) iterErr() { + if err := i.iter.Error(); err != nil { + i.setErr(err) + } +} + +func (i *dbIter) Valid() bool { + return i.err == nil && i.dir > dirEOI +} + +func (i *dbIter) First() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.iter.First() { + i.dir = dirSOI + return i.next() + } + i.dir = dirEOI + i.iterErr() + return false +} + +func (i *dbIter) Last() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.iter.Last() { + return i.prev() + } + i.dir = dirSOI + i.iterErr() + return false +} + +func (i *dbIter) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek) + if i.iter.Seek(ikey) { + i.dir = dirSOI + return i.next() + } + i.dir = dirEOI + i.iterErr() + return false +} + +func (i *dbIter) next() bool { + for { + if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil { + i.sampleSeek() + if seq <= i.seq { + switch kt { + case keyTypeDel: + // Skip deleted key. + i.key = append(i.key[:0], ukey...) + i.dir = dirForward + case keyTypeVal: + if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 { + i.key = append(i.key[:0], ukey...) + i.value = append(i.value[:0], i.iter.Value()...) + i.dir = dirForward + return true + } + } + } + } else if i.strict { + i.setErr(kerr) + break + } + if !i.iter.Next() { + i.dir = dirEOI + i.iterErr() + break + } + } + return false +} + +func (i *dbIter) Next() bool { + if i.dir == dirEOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) { + i.dir = dirEOI + i.iterErr() + return false + } + return i.next() +} + +func (i *dbIter) prev() bool { + i.dir = dirBackward + del := true + if i.iter.Valid() { + for { + if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil { + i.sampleSeek() + if seq <= i.seq { + if !del && i.icmp.uCompare(ukey, i.key) < 0 { + return true + } + del = (kt == keyTypeDel) + if !del { + i.key = append(i.key[:0], ukey...) + i.value = append(i.value[:0], i.iter.Value()...) + } + } + } else if i.strict { + i.setErr(kerr) + return false + } + if !i.iter.Prev() { + break + } + } + } + if del { + i.dir = dirSOI + i.iterErr() + return false + } + return true +} + +func (i *dbIter) Prev() bool { + if i.dir == dirSOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + switch i.dir { + case dirEOI: + return i.Last() + case dirForward: + for i.iter.Prev() { + if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil { + i.sampleSeek() + if i.icmp.uCompare(ukey, i.key) < 0 { + goto cont + } + } else if i.strict { + i.setErr(kerr) + return false + } + } + i.dir = dirSOI + i.iterErr() + return false + } + +cont: + return i.prev() +} + +func (i *dbIter) Key() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.key +} + +func (i *dbIter) Value() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.value +} + +func (i *dbIter) Release() { + if i.dir != dirReleased { + // Clear the finalizer. + runtime.SetFinalizer(i, nil) + + if i.releaser != nil { + i.releaser.Release() + i.releaser = nil + } + + i.dir = dirReleased + i.key = nil + i.value = nil + i.iter.Release() + i.iter = nil + atomic.AddInt32(&i.db.aliveIters, -1) + i.db = nil + } +} + +func (i *dbIter) SetReleaser(releaser util.Releaser) { + if i.dir == dirReleased { + panic(util.ErrReleased) + } + if i.releaser != nil && releaser != nil { + panic(util.ErrHasReleaser) + } + i.releaser = releaser +} + +func (i *dbIter) Error() error { + return i.err +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go new file mode 100644 index 0000000000..2c69d2e531 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go @@ -0,0 +1,183 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "container/list" + "fmt" + "runtime" + "sync" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type snapshotElement struct { + seq uint64 + ref int + e *list.Element +} + +// Acquires a snapshot, based on latest sequence. +func (db *DB) acquireSnapshot() *snapshotElement { + db.snapsMu.Lock() + defer db.snapsMu.Unlock() + + seq := db.getSeq() + + if e := db.snapsList.Back(); e != nil { + se := e.Value.(*snapshotElement) + if se.seq == seq { + se.ref++ + return se + } else if seq < se.seq { + panic("leveldb: sequence number is not increasing") + } + } + se := &snapshotElement{seq: seq, ref: 1} + se.e = db.snapsList.PushBack(se) + return se +} + +// Releases given snapshot element. +func (db *DB) releaseSnapshot(se *snapshotElement) { + db.snapsMu.Lock() + defer db.snapsMu.Unlock() + + se.ref-- + if se.ref == 0 { + db.snapsList.Remove(se.e) + se.e = nil + } else if se.ref < 0 { + panic("leveldb: Snapshot: negative element reference") + } +} + +// Gets minimum sequence that not being snapshotted. +func (db *DB) minSeq() uint64 { + db.snapsMu.Lock() + defer db.snapsMu.Unlock() + + if e := db.snapsList.Front(); e != nil { + return e.Value.(*snapshotElement).seq + } + + return db.getSeq() +} + +// Snapshot is a DB snapshot. +type Snapshot struct { + db *DB + elem *snapshotElement + mu sync.RWMutex + released bool +} + +// Creates new snapshot object. +func (db *DB) newSnapshot() *Snapshot { + snap := &Snapshot{ + db: db, + elem: db.acquireSnapshot(), + } + atomic.AddInt32(&db.aliveSnaps, 1) + runtime.SetFinalizer(snap, (*Snapshot).Release) + return snap +} + +func (snap *Snapshot) String() string { + return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq) +} + +// Get gets the value for the given key. It returns ErrNotFound if +// the DB does not contains the key. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Get returns. +func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + err = snap.db.ok() + if err != nil { + return + } + snap.mu.RLock() + defer snap.mu.RUnlock() + if snap.released { + err = ErrSnapshotReleased + return + } + return snap.db.get(nil, nil, key, snap.elem.seq, ro) +} + +// Has returns true if the DB does contains the given key. +// +// It is safe to modify the contents of the argument after Get returns. +func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) { + err = snap.db.ok() + if err != nil { + return + } + snap.mu.RLock() + defer snap.mu.RUnlock() + if snap.released { + err = ErrSnapshotReleased + return + } + return snap.db.has(nil, nil, key, snap.elem.seq, ro) +} + +// NewIterator returns an iterator for the snapshot of the underlying DB. +// The returned iterator is not safe for concurrent use, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently with modifying its +// underlying DB. The resultant key/value pairs are guaranteed to be +// consistent. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// Releasing the snapshot doesn't mean releasing the iterator too, the +// iterator would be still valid until released. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + if err := snap.db.ok(); err != nil { + return iterator.NewEmptyIterator(err) + } + snap.mu.Lock() + defer snap.mu.Unlock() + if snap.released { + return iterator.NewEmptyIterator(ErrSnapshotReleased) + } + // Since iterator already hold version ref, it doesn't need to + // hold snapshot ref. + return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro) +} + +// Release releases the snapshot. This will not release any returned +// iterators, the iterators would still be valid until released or the +// underlying DB is closed. +// +// Other methods should not be called after the snapshot has been released. +func (snap *Snapshot) Release() { + snap.mu.Lock() + defer snap.mu.Unlock() + + if !snap.released { + // Clear the finalizer. + runtime.SetFinalizer(snap, nil) + + snap.released = true + snap.db.releaseSnapshot(snap.elem) + atomic.AddInt32(&snap.db.aliveSnaps, -1) + snap.db = nil + snap.elem = nil + } +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go new file mode 100644 index 0000000000..65e1c54bb4 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go @@ -0,0 +1,239 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "sync/atomic" + "time" + + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +var ( + errHasFrozenMem = errors.New("has frozen mem") +) + +type memDB struct { + db *DB + *memdb.DB + ref int32 +} + +func (m *memDB) getref() int32 { + return atomic.LoadInt32(&m.ref) +} + +func (m *memDB) incref() { + atomic.AddInt32(&m.ref, 1) +} + +func (m *memDB) decref() { + if ref := atomic.AddInt32(&m.ref, -1); ref == 0 { + // Only put back memdb with std capacity. + if m.Capacity() == m.db.s.o.GetWriteBuffer() { + m.Reset() + m.db.mpoolPut(m.DB) + } + m.db = nil + m.DB = nil + } else if ref < 0 { + panic("negative memdb ref") + } +} + +// Get latest sequence number. +func (db *DB) getSeq() uint64 { + return atomic.LoadUint64(&db.seq) +} + +// Atomically adds delta to seq. +func (db *DB) addSeq(delta uint64) { + atomic.AddUint64(&db.seq, delta) +} + +func (db *DB) setSeq(seq uint64) { + atomic.StoreUint64(&db.seq, seq) +} + +func (db *DB) sampleSeek(ikey internalKey) { + v := db.s.version() + if v.sampleSeek(ikey) { + // Trigger table compaction. + db.compTrigger(db.tcompCmdC) + } + v.release() +} + +func (db *DB) mpoolPut(mem *memdb.DB) { + if !db.isClosed() { + select { + case db.memPool <- mem: + default: + } + } +} + +func (db *DB) mpoolGet(n int) *memDB { + var mdb *memdb.DB + select { + case mdb = <-db.memPool: + default: + } + if mdb == nil || mdb.Capacity() < n { + mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n)) + } + return &memDB{ + db: db, + DB: mdb, + } +} + +func (db *DB) mpoolDrain() { + ticker := time.NewTicker(30 * time.Second) + for { + select { + case <-ticker.C: + select { + case <-db.memPool: + default: + } + case <-db.closeC: + ticker.Stop() + // Make sure the pool is drained. + select { + case <-db.memPool: + case <-time.After(time.Second): + } + close(db.memPool) + return + } + } +} + +// Create new memdb and froze the old one; need external synchronization. +// newMem only called synchronously by the writer. +func (db *DB) newMem(n int) (mem *memDB, err error) { + fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()} + w, err := db.s.stor.Create(fd) + if err != nil { + db.s.reuseFileNum(fd.Num) + return + } + + db.memMu.Lock() + defer db.memMu.Unlock() + + if db.frozenMem != nil { + return nil, errHasFrozenMem + } + + if db.journal == nil { + db.journal = journal.NewWriter(w) + } else { + db.journal.Reset(w) + db.journalWriter.Close() + db.frozenJournalFd = db.journalFd + } + db.journalWriter = w + db.journalFd = fd + db.frozenMem = db.mem + mem = db.mpoolGet(n) + mem.incref() // for self + mem.incref() // for caller + db.mem = mem + // The seq only incremented by the writer. And whoever called newMem + // should hold write lock, so no need additional synchronization here. + db.frozenSeq = db.seq + return +} + +// Get all memdbs. +func (db *DB) getMems() (e, f *memDB) { + db.memMu.RLock() + defer db.memMu.RUnlock() + if db.mem != nil { + db.mem.incref() + } else if !db.isClosed() { + panic("nil effective mem") + } + if db.frozenMem != nil { + db.frozenMem.incref() + } + return db.mem, db.frozenMem +} + +// Get effective memdb. +func (db *DB) getEffectiveMem() *memDB { + db.memMu.RLock() + defer db.memMu.RUnlock() + if db.mem != nil { + db.mem.incref() + } else if !db.isClosed() { + panic("nil effective mem") + } + return db.mem +} + +// Check whether we has frozen memdb. +func (db *DB) hasFrozenMem() bool { + db.memMu.RLock() + defer db.memMu.RUnlock() + return db.frozenMem != nil +} + +// Get frozen memdb. +func (db *DB) getFrozenMem() *memDB { + db.memMu.RLock() + defer db.memMu.RUnlock() + if db.frozenMem != nil { + db.frozenMem.incref() + } + return db.frozenMem +} + +// Drop frozen memdb; assume that frozen memdb isn't nil. +func (db *DB) dropFrozenMem() { + db.memMu.Lock() + if err := db.s.stor.Remove(db.frozenJournalFd); err != nil { + db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err) + } else { + db.logf("journal@remove removed @%d", db.frozenJournalFd.Num) + } + db.frozenJournalFd = storage.FileDesc{} + db.frozenMem.decref() + db.frozenMem = nil + db.memMu.Unlock() +} + +// Clear mems ptr; used by DB.Close(). +func (db *DB) clearMems() { + db.memMu.Lock() + db.mem = nil + db.frozenMem = nil + db.memMu.Unlock() +} + +// Set closed flag; return true if not already closed. +func (db *DB) setClosed() bool { + return atomic.CompareAndSwapUint32(&db.closed, 0, 1) +} + +// Check whether DB was closed. +func (db *DB) isClosed() bool { + return atomic.LoadUint32(&db.closed) != 0 +} + +// Check read ok status. +func (db *DB) ok() error { + if db.isClosed() { + return ErrClosed + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go new file mode 100644 index 0000000000..b8f7e7d21d --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go @@ -0,0 +1,325 @@ +// Copyright (c) 2016, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var errTransactionDone = errors.New("leveldb: transaction already closed") + +// Transaction is the transaction handle. +type Transaction struct { + db *DB + lk sync.RWMutex + seq uint64 + mem *memDB + tables tFiles + ikScratch []byte + rec sessionRecord + stats cStatStaging + closed bool +} + +// Get gets the value for the given key. It returns ErrNotFound if the +// DB does not contains the key. +// +// The returned slice is its own copy, it is safe to modify the contents +// of the returned slice. +// It is safe to modify the contents of the argument after Get returns. +func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) { + tr.lk.RLock() + defer tr.lk.RUnlock() + if tr.closed { + return nil, errTransactionDone + } + return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro) +} + +// Has returns true if the DB does contains the given key. +// +// It is safe to modify the contents of the argument after Has returns. +func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) { + tr.lk.RLock() + defer tr.lk.RUnlock() + if tr.closed { + return false, errTransactionDone + } + return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro) +} + +// NewIterator returns an iterator for the latest snapshot of the transaction. +// The returned iterator is not safe for concurrent use, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently while writes to the +// transaction. The resultant key/value pairs are guaranteed to be consistent. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + tr.lk.RLock() + defer tr.lk.RUnlock() + if tr.closed { + return iterator.NewEmptyIterator(errTransactionDone) + } + tr.mem.incref() + return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro) +} + +func (tr *Transaction) flush() error { + // Flush memdb. + if tr.mem.Len() != 0 { + tr.stats.startTimer() + iter := tr.mem.NewIterator(nil) + t, n, err := tr.db.s.tops.createFrom(iter) + iter.Release() + tr.stats.stopTimer() + if err != nil { + return err + } + if tr.mem.getref() == 1 { + tr.mem.Reset() + } else { + tr.mem.decref() + tr.mem = tr.db.mpoolGet(0) + tr.mem.incref() + } + tr.tables = append(tr.tables, t) + tr.rec.addTableFile(0, t) + tr.stats.write += t.size + tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax) + } + return nil +} + +func (tr *Transaction) put(kt keyType, key, value []byte) error { + tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt) + if tr.mem.Free() < len(tr.ikScratch)+len(value) { + if err := tr.flush(); err != nil { + return err + } + } + if err := tr.mem.Put(tr.ikScratch, value); err != nil { + return err + } + tr.seq++ + return nil +} + +// Put sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. +// Please note that the transaction is not compacted until committed, so if you +// writes 10 same keys, then those 10 same keys are in the transaction. +// +// It is safe to modify the contents of the arguments after Put returns. +func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error { + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + return tr.put(keyTypeVal, key, value) +} + +// Delete deletes the value for the given key. +// Please note that the transaction is not compacted until committed, so if you +// writes 10 same keys, then those 10 same keys are in the transaction. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error { + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + return tr.put(keyTypeDel, key, nil) +} + +// Write apply the given batch to the transaction. The batch will be applied +// sequentially. +// Please note that the transaction is not compacted until committed, so if you +// writes 10 same keys, then those 10 same keys are in the transaction. +// +// It is safe to modify the contents of the arguments after Write returns. +func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error { + if b == nil || b.Len() == 0 { + return nil + } + + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + return b.replayInternal(func(i int, kt keyType, k, v []byte) error { + return tr.put(kt, k, v) + }) +} + +func (tr *Transaction) setDone() { + tr.closed = true + tr.db.tr = nil + tr.mem.decref() + <-tr.db.writeLockC +} + +// Commit commits the transaction. If error is not nil, then the transaction is +// not committed, it can then either be retried or discarded. +// +// Other methods should not be called after transaction has been committed. +func (tr *Transaction) Commit() error { + if err := tr.db.ok(); err != nil { + return err + } + + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + if err := tr.flush(); err != nil { + // Return error, lets user decide either to retry or discard + // transaction. + return err + } + if len(tr.tables) != 0 { + // Committing transaction. + tr.rec.setSeqNum(tr.seq) + tr.db.compCommitLk.Lock() + tr.stats.startTimer() + var cerr error + for retry := 0; retry < 3; retry++ { + cerr = tr.db.s.commit(&tr.rec) + if cerr != nil { + tr.db.logf("transaction@commit error R·%d %q", retry, cerr) + select { + case <-time.After(time.Second): + case <-tr.db.closeC: + tr.db.logf("transaction@commit exiting") + tr.db.compCommitLk.Unlock() + return cerr + } + } else { + // Success. Set db.seq. + tr.db.setSeq(tr.seq) + break + } + } + tr.stats.stopTimer() + if cerr != nil { + // Return error, lets user decide either to retry or discard + // transaction. + return cerr + } + + // Update compaction stats. This is safe as long as we hold compCommitLk. + tr.db.compStats.addStat(0, &tr.stats) + + // Trigger table auto-compaction. + tr.db.compTrigger(tr.db.tcompCmdC) + tr.db.compCommitLk.Unlock() + + // Additionally, wait compaction when certain threshold reached. + // Ignore error, returns error only if transaction can't be committed. + tr.db.waitCompaction() + } + // Only mark as done if transaction committed successfully. + tr.setDone() + return nil +} + +func (tr *Transaction) discard() { + // Discard transaction. + for _, t := range tr.tables { + tr.db.logf("transaction@discard @%d", t.fd.Num) + if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil { + tr.db.s.reuseFileNum(t.fd.Num) + } + } +} + +// Discard discards the transaction. +// +// Other methods should not be called after transaction has been discarded. +func (tr *Transaction) Discard() { + tr.lk.Lock() + if !tr.closed { + tr.discard() + tr.setDone() + } + tr.lk.Unlock() +} + +func (db *DB) waitCompaction() error { + if db.s.tLen(0) >= db.s.o.GetWriteL0PauseTrigger() { + return db.compTriggerWait(db.tcompCmdC) + } + return nil +} + +// OpenTransaction opens an atomic DB transaction. Only one transaction can be +// opened at a time. Subsequent call to Write and OpenTransaction will be blocked +// until in-flight transaction is committed or discarded. +// The returned transaction handle is safe for concurrent use. +// +// Transaction is expensive and can overwhelm compaction, especially if +// transaction size is small. Use with caution. +// +// The transaction must be closed once done, either by committing or discarding +// the transaction. +// Closing the DB will discard open transaction. +func (db *DB) OpenTransaction() (*Transaction, error) { + if err := db.ok(); err != nil { + return nil, err + } + + // The write happen synchronously. + select { + case db.writeLockC <- struct{}{}: + case err := <-db.compPerErrC: + return nil, err + case <-db.closeC: + return nil, ErrClosed + } + + if db.tr != nil { + panic("leveldb: has open transaction") + } + + // Flush current memdb. + if db.mem != nil && db.mem.Len() != 0 { + if _, err := db.rotateMem(0, true); err != nil { + return nil, err + } + } + + // Wait compaction when certain threshold reached. + if err := db.waitCompaction(); err != nil { + return nil, err + } + + tr := &Transaction{ + db: db, + seq: db.seq, + mem: db.mpoolGet(0), + } + tr.mem.incref() + db.tr = tr + return tr, nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go new file mode 100644 index 0000000000..3f0654894b --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go @@ -0,0 +1,102 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// Reader is the interface that wraps basic Get and NewIterator methods. +// This interface implemented by both DB and Snapshot. +type Reader interface { + Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) + NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator +} + +// Sizes is list of size. +type Sizes []int64 + +// Sum returns sum of the sizes. +func (sizes Sizes) Sum() int64 { + var sum int64 + for _, size := range sizes { + sum += size + } + return sum +} + +// Logging. +func (db *DB) log(v ...interface{}) { db.s.log(v...) } +func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) } + +// Check and clean files. +func (db *DB) checkAndCleanFiles() error { + v := db.s.version() + defer v.release() + + tmap := make(map[int64]bool) + for _, tables := range v.levels { + for _, t := range tables { + tmap[t.fd.Num] = false + } + } + + fds, err := db.s.stor.List(storage.TypeAll) + if err != nil { + return err + } + + var nt int + var rem []storage.FileDesc + for _, fd := range fds { + keep := true + switch fd.Type { + case storage.TypeManifest: + keep = fd.Num >= db.s.manifestFd.Num + case storage.TypeJournal: + if !db.frozenJournalFd.Zero() { + keep = fd.Num >= db.frozenJournalFd.Num + } else { + keep = fd.Num >= db.journalFd.Num + } + case storage.TypeTable: + _, keep = tmap[fd.Num] + if keep { + tmap[fd.Num] = true + nt++ + } + } + + if !keep { + rem = append(rem, fd) + } + } + + if nt != len(tmap) { + var mfds []storage.FileDesc + for num, present := range tmap { + if !present { + mfds = append(mfds, storage.FileDesc{Type: storage.TypeTable, Num: num}) + db.logf("db@janitor table missing @%d", num) + } + } + return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds}) + } + + db.logf("db@janitor F·%d G·%d", len(fds), len(rem)) + for _, fd := range rem { + db.logf("db@janitor removing %s-%d", fd.Type, fd.Num) + if err := db.s.stor.Remove(fd); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go new file mode 100644 index 0000000000..db0c1bece1 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go @@ -0,0 +1,464 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "sync/atomic" + "time" + + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func (db *DB) writeJournal(batches []*Batch, seq uint64, sync bool) error { + wr, err := db.journal.Next() + if err != nil { + return err + } + if err := writeBatchesWithHeader(wr, batches, seq); err != nil { + return err + } + if err := db.journal.Flush(); err != nil { + return err + } + if sync { + return db.journalWriter.Sync() + } + return nil +} + +func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) { + retryLimit := 3 +retry: + // Wait for pending memdb compaction. + err = db.compTriggerWait(db.mcompCmdC) + if err != nil { + return + } + retryLimit-- + + // Create new memdb and journal. + mem, err = db.newMem(n) + if err != nil { + if err == errHasFrozenMem { + if retryLimit <= 0 { + panic("BUG: still has frozen memdb") + } + goto retry + } + return + } + + // Schedule memdb compaction. + if wait { + err = db.compTriggerWait(db.mcompCmdC) + } else { + db.compTrigger(db.mcompCmdC) + } + return +} + +func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) { + delayed := false + slowdownTrigger := db.s.o.GetWriteL0SlowdownTrigger() + pauseTrigger := db.s.o.GetWriteL0PauseTrigger() + flush := func() (retry bool) { + mdb = db.getEffectiveMem() + if mdb == nil { + err = ErrClosed + return false + } + defer func() { + if retry { + mdb.decref() + mdb = nil + } + }() + tLen := db.s.tLen(0) + mdbFree = mdb.Free() + switch { + case tLen >= slowdownTrigger && !delayed: + delayed = true + time.Sleep(time.Millisecond) + case mdbFree >= n: + return false + case tLen >= pauseTrigger: + delayed = true + // Set the write paused flag explicitly. + atomic.StoreInt32(&db.inWritePaused, 1) + err = db.compTriggerWait(db.tcompCmdC) + // Unset the write paused flag. + atomic.StoreInt32(&db.inWritePaused, 0) + if err != nil { + return false + } + default: + // Allow memdb to grow if it has no entry. + if mdb.Len() == 0 { + mdbFree = n + } else { + mdb.decref() + mdb, err = db.rotateMem(n, false) + if err == nil { + mdbFree = mdb.Free() + } else { + mdbFree = 0 + } + } + return false + } + return true + } + start := time.Now() + for flush() { + } + if delayed { + db.writeDelay += time.Since(start) + db.writeDelayN++ + } else if db.writeDelayN > 0 { + db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay) + atomic.AddInt32(&db.cWriteDelayN, int32(db.writeDelayN)) + atomic.AddInt64(&db.cWriteDelay, int64(db.writeDelay)) + db.writeDelay = 0 + db.writeDelayN = 0 + } + return +} + +type writeMerge struct { + sync bool + batch *Batch + keyType keyType + key, value []byte +} + +func (db *DB) unlockWrite(overflow bool, merged int, err error) { + for i := 0; i < merged; i++ { + db.writeAckC <- err + } + if overflow { + // Pass lock to the next write (that failed to merge). + db.writeMergedC <- false + } else { + // Release lock. + <-db.writeLockC + } +} + +// ourBatch is batch that we can modify. +func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error { + // Try to flush memdb. This method would also trying to throttle writes + // if it is too fast and compaction cannot catch-up. + mdb, mdbFree, err := db.flush(batch.internalLen) + if err != nil { + db.unlockWrite(false, 0, err) + return err + } + defer mdb.decref() + + var ( + overflow bool + merged int + batches = []*Batch{batch} + ) + + if merge { + // Merge limit. + var mergeLimit int + if batch.internalLen > 128<<10 { + mergeLimit = (1 << 20) - batch.internalLen + } else { + mergeLimit = 128 << 10 + } + mergeCap := mdbFree - batch.internalLen + if mergeLimit > mergeCap { + mergeLimit = mergeCap + } + + merge: + for mergeLimit > 0 { + select { + case incoming := <-db.writeMergeC: + if incoming.batch != nil { + // Merge batch. + if incoming.batch.internalLen > mergeLimit { + overflow = true + break merge + } + batches = append(batches, incoming.batch) + mergeLimit -= incoming.batch.internalLen + } else { + // Merge put. + internalLen := len(incoming.key) + len(incoming.value) + 8 + if internalLen > mergeLimit { + overflow = true + break merge + } + if ourBatch == nil { + ourBatch = db.batchPool.Get().(*Batch) + ourBatch.Reset() + batches = append(batches, ourBatch) + } + // We can use same batch since concurrent write doesn't + // guarantee write order. + ourBatch.appendRec(incoming.keyType, incoming.key, incoming.value) + mergeLimit -= internalLen + } + sync = sync || incoming.sync + merged++ + db.writeMergedC <- true + + default: + break merge + } + } + } + + // Release ourBatch if any. + if ourBatch != nil { + defer db.batchPool.Put(ourBatch) + } + + // Seq number. + seq := db.seq + 1 + + // Write journal. + if err := db.writeJournal(batches, seq, sync); err != nil { + db.unlockWrite(overflow, merged, err) + return err + } + + // Put batches. + for _, batch := range batches { + if err := batch.putMem(seq, mdb.DB); err != nil { + panic(err) + } + seq += uint64(batch.Len()) + } + + // Incr seq number. + db.addSeq(uint64(batchesLen(batches))) + + // Rotate memdb if it's reach the threshold. + if batch.internalLen >= mdbFree { + db.rotateMem(0, false) + } + + db.unlockWrite(overflow, merged, nil) + return nil +} + +// Write apply the given batch to the DB. The batch records will be applied +// sequentially. Write might be used concurrently, when used concurrently and +// batch is small enough, write will try to merge the batches. Set NoWriteMerge +// option to true to disable write merge. +// +// It is safe to modify the contents of the arguments after Write returns but +// not before. Write will not modify content of the batch. +func (db *DB) Write(batch *Batch, wo *opt.WriteOptions) error { + if err := db.ok(); err != nil || batch == nil || batch.Len() == 0 { + return err + } + + // If the batch size is larger than write buffer, it may justified to write + // using transaction instead. Using transaction the batch will be written + // into tables directly, skipping the journaling. + if batch.internalLen > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() { + tr, err := db.OpenTransaction() + if err != nil { + return err + } + if err := tr.Write(batch, wo); err != nil { + tr.Discard() + return err + } + return tr.Commit() + } + + merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge() + sync := wo.GetSync() && !db.s.o.GetNoSync() + + // Acquire write lock. + if merge { + select { + case db.writeMergeC <- writeMerge{sync: sync, batch: batch}: + if <-db.writeMergedC { + // Write is merged. + return <-db.writeAckC + } + // Write is not merged, the write lock is handed to us. Continue. + case db.writeLockC <- struct{}{}: + // Write lock acquired. + case err := <-db.compPerErrC: + // Compaction error. + return err + case <-db.closeC: + // Closed + return ErrClosed + } + } else { + select { + case db.writeLockC <- struct{}{}: + // Write lock acquired. + case err := <-db.compPerErrC: + // Compaction error. + return err + case <-db.closeC: + // Closed + return ErrClosed + } + } + + return db.writeLocked(batch, nil, merge, sync) +} + +func (db *DB) putRec(kt keyType, key, value []byte, wo *opt.WriteOptions) error { + if err := db.ok(); err != nil { + return err + } + + merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge() + sync := wo.GetSync() && !db.s.o.GetNoSync() + + // Acquire write lock. + if merge { + select { + case db.writeMergeC <- writeMerge{sync: sync, keyType: kt, key: key, value: value}: + if <-db.writeMergedC { + // Write is merged. + return <-db.writeAckC + } + // Write is not merged, the write lock is handed to us. Continue. + case db.writeLockC <- struct{}{}: + // Write lock acquired. + case err := <-db.compPerErrC: + // Compaction error. + return err + case <-db.closeC: + // Closed + return ErrClosed + } + } else { + select { + case db.writeLockC <- struct{}{}: + // Write lock acquired. + case err := <-db.compPerErrC: + // Compaction error. + return err + case <-db.closeC: + // Closed + return ErrClosed + } + } + + batch := db.batchPool.Get().(*Batch) + batch.Reset() + batch.appendRec(kt, key, value) + return db.writeLocked(batch, batch, merge, sync) +} + +// Put sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. Write merge also applies for Put, see +// Write. +// +// It is safe to modify the contents of the arguments after Put returns but not +// before. +func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error { + return db.putRec(keyTypeVal, key, value, wo) +} + +// Delete deletes the value for the given key. Delete will not returns error if +// key doesn't exist. Write merge also applies for Delete, see Write. +// +// It is safe to modify the contents of the arguments after Delete returns but +// not before. +func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error { + return db.putRec(keyTypeDel, key, nil, wo) +} + +func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool { + iter := mem.NewIterator(nil) + defer iter.Release() + return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) && + (min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0)) +} + +// CompactRange compacts the underlying DB for the given key range. +// In particular, deleted and overwritten versions are discarded, +// and the data is rearranged to reduce the cost of operations +// needed to access the data. This operation should typically only +// be invoked by users who understand the underlying implementation. +// +// A nil Range.Start is treated as a key before all keys in the DB. +// And a nil Range.Limit is treated as a key after all keys in the DB. +// Therefore if both is nil then it will compact entire DB. +func (db *DB) CompactRange(r util.Range) error { + if err := db.ok(); err != nil { + return err + } + + // Lock writer. + select { + case db.writeLockC <- struct{}{}: + case err := <-db.compPerErrC: + return err + case <-db.closeC: + return ErrClosed + } + + // Check for overlaps in memdb. + mdb := db.getEffectiveMem() + if mdb == nil { + return ErrClosed + } + defer mdb.decref() + if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) { + // Memdb compaction. + if _, err := db.rotateMem(0, false); err != nil { + <-db.writeLockC + return err + } + <-db.writeLockC + if err := db.compTriggerWait(db.mcompCmdC); err != nil { + return err + } + } else { + <-db.writeLockC + } + + // Table compaction. + return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit) +} + +// SetReadOnly makes DB read-only. It will stay read-only until reopened. +func (db *DB) SetReadOnly() error { + if err := db.ok(); err != nil { + return err + } + + // Lock writer. + select { + case db.writeLockC <- struct{}{}: + db.compWriteLocking = true + case err := <-db.compPerErrC: + return err + case <-db.closeC: + return ErrClosed + } + + // Set compaction read-only. + select { + case db.compErrSetC <- ErrReadOnly: + case perr := <-db.compPerErrC: + return perr + case <-db.closeC: + return ErrClosed + } + + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/doc.go b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go new file mode 100644 index 0000000000..be768e5739 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go @@ -0,0 +1,92 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package leveldb provides implementation of LevelDB key/value database. +// +// Create or open a database: +// +// // The returned DB instance is safe for concurrent use. Which mean that all +// // DB's methods may be called concurrently from multiple goroutine. +// db, err := leveldb.OpenFile("path/to/db", nil) +// ... +// defer db.Close() +// ... +// +// Read or modify the database content: +// +// // Remember that the contents of the returned slice should not be modified. +// data, err := db.Get([]byte("key"), nil) +// ... +// err = db.Put([]byte("key"), []byte("value"), nil) +// ... +// err = db.Delete([]byte("key"), nil) +// ... +// +// Iterate over database content: +// +// iter := db.NewIterator(nil, nil) +// for iter.Next() { +// // Remember that the contents of the returned slice should not be modified, and +// // only valid until the next call to Next. +// key := iter.Key() +// value := iter.Value() +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Iterate over subset of database content with a particular prefix: +// iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil) +// for iter.Next() { +// // Use key/value. +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Seek-then-Iterate: +// +// iter := db.NewIterator(nil, nil) +// for ok := iter.Seek(key); ok; ok = iter.Next() { +// // Use key/value. +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Iterate over subset of database content: +// +// iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil) +// for iter.Next() { +// // Use key/value. +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Batch writes: +// +// batch := new(leveldb.Batch) +// batch.Put([]byte("foo"), []byte("value")) +// batch.Put([]byte("bar"), []byte("another value")) +// batch.Delete([]byte("baz")) +// err = db.Write(batch, nil) +// ... +// +// Use bloom filter: +// +// o := &opt.Options{ +// Filter: filter.NewBloomFilter(10), +// } +// db, err := leveldb.OpenFile("path/to/db", o) +// ... +// defer db.Close() +// ... +package leveldb diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go new file mode 100644 index 0000000000..de2649812c --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go @@ -0,0 +1,20 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/errors" +) + +// Common errors. +var ( + ErrNotFound = errors.ErrNotFound + ErrReadOnly = errors.New("leveldb: read-only mode") + ErrSnapshotReleased = errors.New("leveldb: snapshot released") + ErrIterReleased = errors.New("leveldb: iterator released") + ErrClosed = errors.New("leveldb: closed") +) diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go new file mode 100644 index 0000000000..8d6146b6f5 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go @@ -0,0 +1,78 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package errors provides common error types used throughout leveldb. +package errors + +import ( + "errors" + "fmt" + + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// Common errors. +var ( + ErrNotFound = New("leveldb: not found") + ErrReleased = util.ErrReleased + ErrHasReleaser = util.ErrHasReleaser +) + +// New returns an error that formats as the given text. +func New(text string) error { + return errors.New(text) +} + +// ErrCorrupted is the type that wraps errors that indicate corruption in +// the database. +type ErrCorrupted struct { + Fd storage.FileDesc + Err error +} + +func (e *ErrCorrupted) Error() string { + if !e.Fd.Zero() { + return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd) + } + return e.Err.Error() +} + +// NewErrCorrupted creates new ErrCorrupted error. +func NewErrCorrupted(fd storage.FileDesc, err error) error { + return &ErrCorrupted{fd, err} +} + +// IsCorrupted returns a boolean indicating whether the error is indicating +// a corruption. +func IsCorrupted(err error) bool { + switch err.(type) { + case *ErrCorrupted: + return true + case *storage.ErrCorrupted: + return true + } + return false +} + +// ErrMissingFiles is the type that indicating a corruption due to missing +// files. ErrMissingFiles always wrapped with ErrCorrupted. +type ErrMissingFiles struct { + Fds []storage.FileDesc +} + +func (e *ErrMissingFiles) Error() string { return "file missing" } + +// SetFd sets 'file info' of the given error with the given file. +// Currently only ErrCorrupted is supported, otherwise will do nothing. +func SetFd(err error, fd storage.FileDesc) error { + switch x := err.(type) { + case *ErrCorrupted: + x.Fd = fd + return x + } + return err +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go new file mode 100644 index 0000000000..e961e420d3 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go @@ -0,0 +1,31 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/filter" +) + +type iFilter struct { + filter.Filter +} + +func (f iFilter) Contains(filter, key []byte) bool { + return f.Filter.Contains(filter, internalKey(key).ukey()) +} + +func (f iFilter) NewGenerator() filter.FilterGenerator { + return iFilterGenerator{f.Filter.NewGenerator()} +} + +type iFilterGenerator struct { + filter.FilterGenerator +} + +func (g iFilterGenerator) Add(key []byte) { + g.FilterGenerator.Add(internalKey(key).ukey()) +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go new file mode 100644 index 0000000000..bab0e99705 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go @@ -0,0 +1,116 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package filter + +import ( + "github.com/syndtr/goleveldb/leveldb/util" +) + +func bloomHash(key []byte) uint32 { + return util.Hash(key, 0xbc9f1d34) +} + +type bloomFilter int + +// The bloom filter serializes its parameters and is backward compatible +// with respect to them. Therefor, its parameters are not added to its +// name. +func (bloomFilter) Name() string { + return "leveldb.BuiltinBloomFilter" +} + +func (f bloomFilter) Contains(filter, key []byte) bool { + nBytes := len(filter) - 1 + if nBytes < 1 { + return false + } + nBits := uint32(nBytes * 8) + + // Use the encoded k so that we can read filters generated by + // bloom filters created using different parameters. + k := filter[nBytes] + if k > 30 { + // Reserved for potentially new encodings for short bloom filters. + // Consider it a match. + return true + } + + kh := bloomHash(key) + delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits + for j := uint8(0); j < k; j++ { + bitpos := kh % nBits + if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 { + return false + } + kh += delta + } + return true +} + +func (f bloomFilter) NewGenerator() FilterGenerator { + // Round down to reduce probing cost a little bit. + k := uint8(f * 69 / 100) // 0.69 =~ ln(2) + if k < 1 { + k = 1 + } else if k > 30 { + k = 30 + } + return &bloomFilterGenerator{ + n: int(f), + k: k, + } +} + +type bloomFilterGenerator struct { + n int + k uint8 + + keyHashes []uint32 +} + +func (g *bloomFilterGenerator) Add(key []byte) { + // Use double-hashing to generate a sequence of hash values. + // See analysis in [Kirsch,Mitzenmacher 2006]. + g.keyHashes = append(g.keyHashes, bloomHash(key)) +} + +func (g *bloomFilterGenerator) Generate(b Buffer) { + // Compute bloom filter size (in both bits and bytes) + nBits := uint32(len(g.keyHashes) * g.n) + // For small n, we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if nBits < 64 { + nBits = 64 + } + nBytes := (nBits + 7) / 8 + nBits = nBytes * 8 + + dest := b.Alloc(int(nBytes) + 1) + dest[nBytes] = g.k + for _, kh := range g.keyHashes { + delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits + for j := uint8(0); j < g.k; j++ { + bitpos := kh % nBits + dest[bitpos/8] |= (1 << (bitpos % 8)) + kh += delta + } + } + + g.keyHashes = g.keyHashes[:0] +} + +// NewBloomFilter creates a new initialized bloom filter for given +// bitsPerKey. +// +// Since bitsPerKey is persisted individually for each bloom filter +// serialization, bloom filters are backwards compatible with respect to +// changing bitsPerKey. This means that no big performance penalty will +// be experienced when changing the parameter. See documentation for +// opt.Options.Filter for more information. +func NewBloomFilter(bitsPerKey int) Filter { + return bloomFilter(bitsPerKey) +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go new file mode 100644 index 0000000000..7a925c5a86 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go @@ -0,0 +1,60 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package filter provides interface and implementation of probabilistic +// data structure. +// +// The filter is resposible for creating small filter from a set of keys. +// These filter will then used to test whether a key is a member of the set. +// In many cases, a filter can cut down the number of disk seeks from a +// handful to a single disk seek per DB.Get call. +package filter + +// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods. +type Buffer interface { + // Alloc allocs n bytes of slice from the buffer. This also advancing + // write offset. + Alloc(n int) []byte + + // Write appends the contents of p to the buffer. + Write(p []byte) (n int, err error) + + // WriteByte appends the byte c to the buffer. + WriteByte(c byte) error +} + +// Filter is the filter. +type Filter interface { + // Name returns the name of this policy. + // + // Note that if the filter encoding changes in an incompatible way, + // the name returned by this method must be changed. Otherwise, old + // incompatible filters may be passed to methods of this type. + Name() string + + // NewGenerator creates a new filter generator. + NewGenerator() FilterGenerator + + // Contains returns true if the filter contains the given key. + // + // The filter are filters generated by the filter generator. + Contains(filter, key []byte) bool +} + +// FilterGenerator is the filter generator. +type FilterGenerator interface { + // Add adds a key to the filter generator. + // + // The key may become invalid after call to this method end, therefor + // key must be copied if implementation require keeping key for later + // use. The key should not modified directly, doing so may cause + // undefined results. + Add(key []byte) + + // Generate generates filters based on keys passed so far. After call + // to Generate the filter generator maybe resetted, depends on implementation. + Generate(b Buffer) +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go new file mode 100644 index 0000000000..a23ab05f70 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go @@ -0,0 +1,184 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator + +import ( + "github.com/syndtr/goleveldb/leveldb/util" +) + +// BasicArray is the interface that wraps basic Len and Search method. +type BasicArray interface { + // Len returns length of the array. + Len() int + + // Search finds smallest index that point to a key that is greater + // than or equal to the given key. + Search(key []byte) int +} + +// Array is the interface that wraps BasicArray and basic Index method. +type Array interface { + BasicArray + + // Index returns key/value pair with index of i. + Index(i int) (key, value []byte) +} + +// Array is the interface that wraps BasicArray and basic Get method. +type ArrayIndexer interface { + BasicArray + + // Get returns a new data iterator with index of i. + Get(i int) Iterator +} + +type basicArrayIterator struct { + util.BasicReleaser + array BasicArray + pos int + err error +} + +func (i *basicArrayIterator) Valid() bool { + return i.pos >= 0 && i.pos < i.array.Len() && !i.Released() +} + +func (i *basicArrayIterator) First() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + if i.array.Len() == 0 { + i.pos = -1 + return false + } + i.pos = 0 + return true +} + +func (i *basicArrayIterator) Last() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + n := i.array.Len() + if n == 0 { + i.pos = 0 + return false + } + i.pos = n - 1 + return true +} + +func (i *basicArrayIterator) Seek(key []byte) bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + n := i.array.Len() + if n == 0 { + i.pos = 0 + return false + } + i.pos = i.array.Search(key) + if i.pos >= n { + return false + } + return true +} + +func (i *basicArrayIterator) Next() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + i.pos++ + if n := i.array.Len(); i.pos >= n { + i.pos = n + return false + } + return true +} + +func (i *basicArrayIterator) Prev() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + i.pos-- + if i.pos < 0 { + i.pos = -1 + return false + } + return true +} + +func (i *basicArrayIterator) Error() error { return i.err } + +type arrayIterator struct { + basicArrayIterator + array Array + pos int + key, value []byte +} + +func (i *arrayIterator) updateKV() { + if i.pos == i.basicArrayIterator.pos { + return + } + i.pos = i.basicArrayIterator.pos + if i.Valid() { + i.key, i.value = i.array.Index(i.pos) + } else { + i.key = nil + i.value = nil + } +} + +func (i *arrayIterator) Key() []byte { + i.updateKV() + return i.key +} + +func (i *arrayIterator) Value() []byte { + i.updateKV() + return i.value +} + +type arrayIteratorIndexer struct { + basicArrayIterator + array ArrayIndexer +} + +func (i *arrayIteratorIndexer) Get() Iterator { + if i.Valid() { + return i.array.Get(i.basicArrayIterator.pos) + } + return nil +} + +// NewArrayIterator returns an iterator from the given array. +func NewArrayIterator(array Array) Iterator { + return &arrayIterator{ + basicArrayIterator: basicArrayIterator{array: array, pos: -1}, + array: array, + pos: -1, + } +} + +// NewArrayIndexer returns an index iterator from the given array. +func NewArrayIndexer(array ArrayIndexer) IteratorIndexer { + return &arrayIteratorIndexer{ + basicArrayIterator: basicArrayIterator{array: array, pos: -1}, + array: array, + } +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go new file mode 100644 index 0000000000..939adbb933 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go @@ -0,0 +1,242 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator + +import ( + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// IteratorIndexer is the interface that wraps CommonIterator and basic Get +// method. IteratorIndexer provides index for indexed iterator. +type IteratorIndexer interface { + CommonIterator + + // Get returns a new data iterator for the current position, or nil if + // done. + Get() Iterator +} + +type indexedIterator struct { + util.BasicReleaser + index IteratorIndexer + strict bool + + data Iterator + err error + errf func(err error) + closed bool +} + +func (i *indexedIterator) setData() { + if i.data != nil { + i.data.Release() + } + i.data = i.index.Get() +} + +func (i *indexedIterator) clearData() { + if i.data != nil { + i.data.Release() + } + i.data = nil +} + +func (i *indexedIterator) indexErr() { + if err := i.index.Error(); err != nil { + if i.errf != nil { + i.errf(err) + } + i.err = err + } +} + +func (i *indexedIterator) dataErr() bool { + if err := i.data.Error(); err != nil { + if i.errf != nil { + i.errf(err) + } + if i.strict || !errors.IsCorrupted(err) { + i.err = err + return true + } + } + return false +} + +func (i *indexedIterator) Valid() bool { + return i.data != nil && i.data.Valid() +} + +func (i *indexedIterator) First() bool { + if i.err != nil { + return false + } else if i.Released() { + i.err = ErrIterReleased + return false + } + + if !i.index.First() { + i.indexErr() + i.clearData() + return false + } + i.setData() + return i.Next() +} + +func (i *indexedIterator) Last() bool { + if i.err != nil { + return false + } else if i.Released() { + i.err = ErrIterReleased + return false + } + + if !i.index.Last() { + i.indexErr() + i.clearData() + return false + } + i.setData() + if !i.data.Last() { + if i.dataErr() { + return false + } + i.clearData() + return i.Prev() + } + return true +} + +func (i *indexedIterator) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.Released() { + i.err = ErrIterReleased + return false + } + + if !i.index.Seek(key) { + i.indexErr() + i.clearData() + return false + } + i.setData() + if !i.data.Seek(key) { + if i.dataErr() { + return false + } + i.clearData() + return i.Next() + } + return true +} + +func (i *indexedIterator) Next() bool { + if i.err != nil { + return false + } else if i.Released() { + i.err = ErrIterReleased + return false + } + + switch { + case i.data != nil && !i.data.Next(): + if i.dataErr() { + return false + } + i.clearData() + fallthrough + case i.data == nil: + if !i.index.Next() { + i.indexErr() + return false + } + i.setData() + return i.Next() + } + return true +} + +func (i *indexedIterator) Prev() bool { + if i.err != nil { + return false + } else if i.Released() { + i.err = ErrIterReleased + return false + } + + switch { + case i.data != nil && !i.data.Prev(): + if i.dataErr() { + return false + } + i.clearData() + fallthrough + case i.data == nil: + if !i.index.Prev() { + i.indexErr() + return false + } + i.setData() + if !i.data.Last() { + if i.dataErr() { + return false + } + i.clearData() + return i.Prev() + } + } + return true +} + +func (i *indexedIterator) Key() []byte { + if i.data == nil { + return nil + } + return i.data.Key() +} + +func (i *indexedIterator) Value() []byte { + if i.data == nil { + return nil + } + return i.data.Value() +} + +func (i *indexedIterator) Release() { + i.clearData() + i.index.Release() + i.BasicReleaser.Release() +} + +func (i *indexedIterator) Error() error { + if i.err != nil { + return i.err + } + if err := i.index.Error(); err != nil { + return err + } + return nil +} + +func (i *indexedIterator) SetErrorCallback(f func(err error)) { + i.errf = f +} + +// NewIndexedIterator returns an 'indexed iterator'. An index is iterator +// that returns another iterator, a 'data iterator'. A 'data iterator' is the +// iterator that contains actual key/value pairs. +// +// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true) +// won't be ignored and will halt 'indexed iterator', otherwise the iterator will +// continue to the next 'data iterator'. Corruption on 'index iterator' will not be +// ignored and will halt the iterator. +func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator { + return &indexedIterator{index: index, strict: strict} +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go new file mode 100644 index 0000000000..96fb0f6859 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go @@ -0,0 +1,132 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package iterator provides interface and implementation to traverse over +// contents of a database. +package iterator + +import ( + "errors" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + ErrIterReleased = errors.New("leveldb/iterator: iterator released") +) + +// IteratorSeeker is the interface that wraps the 'seeks method'. +type IteratorSeeker interface { + // First moves the iterator to the first key/value pair. If the iterator + // only contains one key/value pair then First and Last would moves + // to the same key/value pair. + // It returns whether such pair exist. + First() bool + + // Last moves the iterator to the last key/value pair. If the iterator + // only contains one key/value pair then First and Last would moves + // to the same key/value pair. + // It returns whether such pair exist. + Last() bool + + // Seek moves the iterator to the first key/value pair whose key is greater + // than or equal to the given key. + // It returns whether such pair exist. + // + // It is safe to modify the contents of the argument after Seek returns. + Seek(key []byte) bool + + // Next moves the iterator to the next key/value pair. + // It returns false if the iterator is exhausted. + Next() bool + + // Prev moves the iterator to the previous key/value pair. + // It returns false if the iterator is exhausted. + Prev() bool +} + +// CommonIterator is the interface that wraps common iterator methods. +type CommonIterator interface { + IteratorSeeker + + // util.Releaser is the interface that wraps basic Release method. + // When called Release will releases any resources associated with the + // iterator. + util.Releaser + + // util.ReleaseSetter is the interface that wraps the basic SetReleaser + // method. + util.ReleaseSetter + + // TODO: Remove this when ready. + Valid() bool + + // Error returns any accumulated error. Exhausting all the key/value pairs + // is not considered to be an error. + Error() error +} + +// Iterator iterates over a DB's key/value pairs in key order. +// +// When encounter an error any 'seeks method' will return false and will +// yield no key/value pairs. The error can be queried by calling the Error +// method. Calling Release is still necessary. +// +// An iterator must be released after use, but it is not necessary to read +// an iterator until exhaustion. +// Also, an iterator is not necessarily safe for concurrent use, but it is +// safe to use multiple iterators concurrently, with each in a dedicated +// goroutine. +type Iterator interface { + CommonIterator + + // Key returns the key of the current key/value pair, or nil if done. + // The caller should not modify the contents of the returned slice, and + // its contents may change on the next call to any 'seeks method'. + Key() []byte + + // Value returns the value of the current key/value pair, or nil if done. + // The caller should not modify the contents of the returned slice, and + // its contents may change on the next call to any 'seeks method'. + Value() []byte +} + +// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback +// method. +// +// ErrorCallbackSetter implemented by indexed and merged iterator. +type ErrorCallbackSetter interface { + // SetErrorCallback allows set an error callback of the corresponding + // iterator. Use nil to clear the callback. + SetErrorCallback(f func(err error)) +} + +type emptyIterator struct { + util.BasicReleaser + err error +} + +func (i *emptyIterator) rErr() { + if i.err == nil && i.Released() { + i.err = ErrIterReleased + } +} + +func (*emptyIterator) Valid() bool { return false } +func (i *emptyIterator) First() bool { i.rErr(); return false } +func (i *emptyIterator) Last() bool { i.rErr(); return false } +func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false } +func (i *emptyIterator) Next() bool { i.rErr(); return false } +func (i *emptyIterator) Prev() bool { i.rErr(); return false } +func (*emptyIterator) Key() []byte { return nil } +func (*emptyIterator) Value() []byte { return nil } +func (i *emptyIterator) Error() error { return i.err } + +// NewEmptyIterator creates an empty iterator. The err parameter can be +// nil, but if not nil the given err will be returned by Error method. +func NewEmptyIterator(err error) Iterator { + return &emptyIterator{err: err} +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go new file mode 100644 index 0000000000..1a7e29df8f --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go @@ -0,0 +1,304 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator + +import ( + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type dir int + +const ( + dirReleased dir = iota - 1 + dirSOI + dirEOI + dirBackward + dirForward +) + +type mergedIterator struct { + cmp comparer.Comparer + iters []Iterator + strict bool + + keys [][]byte + index int + dir dir + err error + errf func(err error) + releaser util.Releaser +} + +func assertKey(key []byte) []byte { + if key == nil { + panic("leveldb/iterator: nil key") + } + return key +} + +func (i *mergedIterator) iterErr(iter Iterator) bool { + if err := iter.Error(); err != nil { + if i.errf != nil { + i.errf(err) + } + if i.strict || !errors.IsCorrupted(err) { + i.err = err + return true + } + } + return false +} + +func (i *mergedIterator) Valid() bool { + return i.err == nil && i.dir > dirEOI +} + +func (i *mergedIterator) First() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + for x, iter := range i.iters { + switch { + case iter.First(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + i.dir = dirSOI + return i.next() +} + +func (i *mergedIterator) Last() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + for x, iter := range i.iters { + switch { + case iter.Last(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + i.dir = dirEOI + return i.prev() +} + +func (i *mergedIterator) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + for x, iter := range i.iters { + switch { + case iter.Seek(key): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + i.dir = dirSOI + return i.next() +} + +func (i *mergedIterator) next() bool { + var key []byte + if i.dir == dirForward { + key = i.keys[i.index] + } + for x, tkey := range i.keys { + if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) { + key = tkey + i.index = x + } + } + if key == nil { + i.dir = dirEOI + return false + } + i.dir = dirForward + return true +} + +func (i *mergedIterator) Next() bool { + if i.dir == dirEOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + switch i.dir { + case dirSOI: + return i.First() + case dirBackward: + key := append([]byte{}, i.keys[i.index]...) + if !i.Seek(key) { + return false + } + return i.Next() + } + + x := i.index + iter := i.iters[x] + switch { + case iter.Next(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + return i.next() +} + +func (i *mergedIterator) prev() bool { + var key []byte + if i.dir == dirBackward { + key = i.keys[i.index] + } + for x, tkey := range i.keys { + if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) { + key = tkey + i.index = x + } + } + if key == nil { + i.dir = dirSOI + return false + } + i.dir = dirBackward + return true +} + +func (i *mergedIterator) Prev() bool { + if i.dir == dirSOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + switch i.dir { + case dirEOI: + return i.Last() + case dirForward: + key := append([]byte{}, i.keys[i.index]...) + for x, iter := range i.iters { + if x == i.index { + continue + } + seek := iter.Seek(key) + switch { + case seek && iter.Prev(), !seek && iter.Last(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + } + + x := i.index + iter := i.iters[x] + switch { + case iter.Prev(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + return i.prev() +} + +func (i *mergedIterator) Key() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.keys[i.index] +} + +func (i *mergedIterator) Value() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.iters[i.index].Value() +} + +func (i *mergedIterator) Release() { + if i.dir != dirReleased { + i.dir = dirReleased + for _, iter := range i.iters { + iter.Release() + } + i.iters = nil + i.keys = nil + if i.releaser != nil { + i.releaser.Release() + i.releaser = nil + } + } +} + +func (i *mergedIterator) SetReleaser(releaser util.Releaser) { + if i.dir == dirReleased { + panic(util.ErrReleased) + } + if i.releaser != nil && releaser != nil { + panic(util.ErrHasReleaser) + } + i.releaser = releaser +} + +func (i *mergedIterator) Error() error { + return i.err +} + +func (i *mergedIterator) SetErrorCallback(f func(err error)) { + i.errf = f +} + +// NewMergedIterator returns an iterator that merges its input. Walking the +// resultant iterator will return all key/value pairs of all input iterators +// in strictly increasing key order, as defined by cmp. +// The input's key ranges may overlap, but there are assumed to be no duplicate +// keys: if iters[i] contains a key k then iters[j] will not contain that key k. +// None of the iters may be nil. +// +// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true) +// won't be ignored and will halt 'merged iterator', otherwise the iterator will +// continue to the next 'input iterator'. +func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator { + return &mergedIterator{ + iters: iters, + cmp: cmp, + strict: strict, + keys: make([][]byte, len(iters)), + } +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go new file mode 100644 index 0000000000..d094c3d0f8 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go @@ -0,0 +1,524 @@ +// Copyright 2011 The LevelDB-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0 +// License, authors and contributors informations can be found at bellow URLs respectively: +// https://code.google.com/p/leveldb-go/source/browse/LICENSE +// https://code.google.com/p/leveldb-go/source/browse/AUTHORS +// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS + +// Package journal reads and writes sequences of journals. Each journal is a stream +// of bytes that completes before the next journal starts. +// +// When reading, call Next to obtain an io.Reader for the next journal. Next will +// return io.EOF when there are no more journals. It is valid to call Next +// without reading the current journal to exhaustion. +// +// When writing, call Next to obtain an io.Writer for the next journal. Calling +// Next finishes the current journal. Call Close to finish the final journal. +// +// Optionally, call Flush to finish the current journal and flush the underlying +// writer without starting a new journal. To start a new journal after flushing, +// call Next. +// +// Neither Readers or Writers are safe to use concurrently. +// +// Example code: +// func read(r io.Reader) ([]string, error) { +// var ss []string +// journals := journal.NewReader(r, nil, true, true) +// for { +// j, err := journals.Next() +// if err == io.EOF { +// break +// } +// if err != nil { +// return nil, err +// } +// s, err := ioutil.ReadAll(j) +// if err != nil { +// return nil, err +// } +// ss = append(ss, string(s)) +// } +// return ss, nil +// } +// +// func write(w io.Writer, ss []string) error { +// journals := journal.NewWriter(w) +// for _, s := range ss { +// j, err := journals.Next() +// if err != nil { +// return err +// } +// if _, err := j.Write([]byte(s)), err != nil { +// return err +// } +// } +// return journals.Close() +// } +// +// The wire format is that the stream is divided into 32KiB blocks, and each +// block contains a number of tightly packed chunks. Chunks cannot cross block +// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a +// block must be zero. +// +// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4 +// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type) +// followed by a payload. The checksum is over the chunk type and the payload. +// +// There are four chunk types: whether the chunk is the full journal, or the +// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal +// has one first chunk, zero or more middle chunks, and one last chunk. +// +// The wire format allows for limited recovery in the face of data corruption: +// on a format error (such as a checksum mismatch), the reader moves to the +// next block and looks for the next full or first chunk. +package journal + +import ( + "encoding/binary" + "fmt" + "io" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// These constants are part of the wire format and should not be changed. +const ( + fullChunkType = 1 + firstChunkType = 2 + middleChunkType = 3 + lastChunkType = 4 +) + +const ( + blockSize = 32 * 1024 + headerSize = 7 +) + +type flusher interface { + Flush() error +} + +// ErrCorrupted is the error type that generated by corrupted block or chunk. +type ErrCorrupted struct { + Size int + Reason string +} + +func (e *ErrCorrupted) Error() string { + return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size) +} + +// Dropper is the interface that wrap simple Drop method. The Drop +// method will be called when the journal reader dropping a block or chunk. +type Dropper interface { + Drop(err error) +} + +// Reader reads journals from an underlying io.Reader. +type Reader struct { + // r is the underlying reader. + r io.Reader + // the dropper. + dropper Dropper + // strict flag. + strict bool + // checksum flag. + checksum bool + // seq is the sequence number of the current journal. + seq int + // buf[i:j] is the unread portion of the current chunk's payload. + // The low bound, i, excludes the chunk header. + i, j int + // n is the number of bytes of buf that are valid. Once reading has started, + // only the final block can have n < blockSize. + n int + // last is whether the current chunk is the last chunk of the journal. + last bool + // err is any accumulated error. + err error + // buf is the buffer. + buf [blockSize]byte +} + +// NewReader returns a new reader. The dropper may be nil, and if +// strict is true then corrupted or invalid chunk will halt the journal +// reader entirely. +func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader { + return &Reader{ + r: r, + dropper: dropper, + strict: strict, + checksum: checksum, + last: true, + } +} + +var errSkip = errors.New("leveldb/journal: skipped") + +func (r *Reader) corrupt(n int, reason string, skip bool) error { + if r.dropper != nil { + r.dropper.Drop(&ErrCorrupted{n, reason}) + } + if r.strict && !skip { + r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason}) + return r.err + } + return errSkip +} + +// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the +// next block into the buffer if necessary. +func (r *Reader) nextChunk(first bool) error { + for { + if r.j+headerSize <= r.n { + checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4]) + length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6]) + chunkType := r.buf[r.j+6] + unprocBlock := r.n - r.j + if checksum == 0 && length == 0 && chunkType == 0 { + // Drop entire block. + r.i = r.n + r.j = r.n + return r.corrupt(unprocBlock, "zero header", false) + } + if chunkType < fullChunkType || chunkType > lastChunkType { + // Drop entire block. + r.i = r.n + r.j = r.n + return r.corrupt(unprocBlock, fmt.Sprintf("invalid chunk type %#x", chunkType), false) + } + r.i = r.j + headerSize + r.j = r.j + headerSize + int(length) + if r.j > r.n { + // Drop entire block. + r.i = r.n + r.j = r.n + return r.corrupt(unprocBlock, "chunk length overflows block", false) + } else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() { + // Drop entire block. + r.i = r.n + r.j = r.n + return r.corrupt(unprocBlock, "checksum mismatch", false) + } + if first && chunkType != fullChunkType && chunkType != firstChunkType { + chunkLength := (r.j - r.i) + headerSize + r.i = r.j + // Report the error, but skip it. + return r.corrupt(chunkLength, "orphan chunk", true) + } + r.last = chunkType == fullChunkType || chunkType == lastChunkType + return nil + } + + // The last block. + if r.n < blockSize && r.n > 0 { + if !first { + return r.corrupt(0, "missing chunk part", false) + } + r.err = io.EOF + return r.err + } + + // Read block. + n, err := io.ReadFull(r.r, r.buf[:]) + if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { + return err + } + if n == 0 { + if !first { + return r.corrupt(0, "missing chunk part", false) + } + r.err = io.EOF + return r.err + } + r.i, r.j, r.n = 0, 0, n + } +} + +// Next returns a reader for the next journal. It returns io.EOF if there are no +// more journals. The reader returned becomes stale after the next Next call, +// and should no longer be used. If strict is false, the reader will returns +// io.ErrUnexpectedEOF error when found corrupted journal. +func (r *Reader) Next() (io.Reader, error) { + r.seq++ + if r.err != nil { + return nil, r.err + } + r.i = r.j + for { + if err := r.nextChunk(true); err == nil { + break + } else if err != errSkip { + return nil, err + } + } + return &singleReader{r, r.seq, nil}, nil +} + +// Reset resets the journal reader, allows reuse of the journal reader. Reset returns +// last accumulated error. +func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error { + r.seq++ + err := r.err + r.r = reader + r.dropper = dropper + r.strict = strict + r.checksum = checksum + r.i = 0 + r.j = 0 + r.n = 0 + r.last = true + r.err = nil + return err +} + +type singleReader struct { + r *Reader + seq int + err error +} + +func (x *singleReader) Read(p []byte) (int, error) { + r := x.r + if r.seq != x.seq { + return 0, errors.New("leveldb/journal: stale reader") + } + if x.err != nil { + return 0, x.err + } + if r.err != nil { + return 0, r.err + } + for r.i == r.j { + if r.last { + return 0, io.EOF + } + x.err = r.nextChunk(false) + if x.err != nil { + if x.err == errSkip { + x.err = io.ErrUnexpectedEOF + } + return 0, x.err + } + } + n := copy(p, r.buf[r.i:r.j]) + r.i += n + return n, nil +} + +func (x *singleReader) ReadByte() (byte, error) { + r := x.r + if r.seq != x.seq { + return 0, errors.New("leveldb/journal: stale reader") + } + if x.err != nil { + return 0, x.err + } + if r.err != nil { + return 0, r.err + } + for r.i == r.j { + if r.last { + return 0, io.EOF + } + x.err = r.nextChunk(false) + if x.err != nil { + if x.err == errSkip { + x.err = io.ErrUnexpectedEOF + } + return 0, x.err + } + } + c := r.buf[r.i] + r.i++ + return c, nil +} + +// Writer writes journals to an underlying io.Writer. +type Writer struct { + // w is the underlying writer. + w io.Writer + // seq is the sequence number of the current journal. + seq int + // f is w as a flusher. + f flusher + // buf[i:j] is the bytes that will become the current chunk. + // The low bound, i, includes the chunk header. + i, j int + // buf[:written] has already been written to w. + // written is zero unless Flush has been called. + written int + // first is whether the current chunk is the first chunk of the journal. + first bool + // pending is whether a chunk is buffered but not yet written. + pending bool + // err is any accumulated error. + err error + // buf is the buffer. + buf [blockSize]byte +} + +// NewWriter returns a new Writer. +func NewWriter(w io.Writer) *Writer { + f, _ := w.(flusher) + return &Writer{ + w: w, + f: f, + } +} + +// fillHeader fills in the header for the pending chunk. +func (w *Writer) fillHeader(last bool) { + if w.i+headerSize > w.j || w.j > blockSize { + panic("leveldb/journal: bad writer state") + } + if last { + if w.first { + w.buf[w.i+6] = fullChunkType + } else { + w.buf[w.i+6] = lastChunkType + } + } else { + if w.first { + w.buf[w.i+6] = firstChunkType + } else { + w.buf[w.i+6] = middleChunkType + } + } + binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value()) + binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize)) +} + +// writeBlock writes the buffered block to the underlying writer, and reserves +// space for the next chunk's header. +func (w *Writer) writeBlock() { + _, w.err = w.w.Write(w.buf[w.written:]) + w.i = 0 + w.j = headerSize + w.written = 0 +} + +// writePending finishes the current journal and writes the buffer to the +// underlying writer. +func (w *Writer) writePending() { + if w.err != nil { + return + } + if w.pending { + w.fillHeader(true) + w.pending = false + } + _, w.err = w.w.Write(w.buf[w.written:w.j]) + w.written = w.j +} + +// Close finishes the current journal and closes the writer. +func (w *Writer) Close() error { + w.seq++ + w.writePending() + if w.err != nil { + return w.err + } + w.err = errors.New("leveldb/journal: closed Writer") + return nil +} + +// Flush finishes the current journal, writes to the underlying writer, and +// flushes it if that writer implements interface{ Flush() error }. +func (w *Writer) Flush() error { + w.seq++ + w.writePending() + if w.err != nil { + return w.err + } + if w.f != nil { + w.err = w.f.Flush() + return w.err + } + return nil +} + +// Reset resets the journal writer, allows reuse of the journal writer. Reset +// will also closes the journal writer if not already. +func (w *Writer) Reset(writer io.Writer) (err error) { + w.seq++ + if w.err == nil { + w.writePending() + err = w.err + } + w.w = writer + w.f, _ = writer.(flusher) + w.i = 0 + w.j = 0 + w.written = 0 + w.first = false + w.pending = false + w.err = nil + return +} + +// Next returns a writer for the next journal. The writer returned becomes stale +// after the next Close, Flush or Next call, and should no longer be used. +func (w *Writer) Next() (io.Writer, error) { + w.seq++ + if w.err != nil { + return nil, w.err + } + if w.pending { + w.fillHeader(true) + } + w.i = w.j + w.j = w.j + headerSize + // Check if there is room in the block for the header. + if w.j > blockSize { + // Fill in the rest of the block with zeroes. + for k := w.i; k < blockSize; k++ { + w.buf[k] = 0 + } + w.writeBlock() + if w.err != nil { + return nil, w.err + } + } + w.first = true + w.pending = true + return singleWriter{w, w.seq}, nil +} + +type singleWriter struct { + w *Writer + seq int +} + +func (x singleWriter) Write(p []byte) (int, error) { + w := x.w + if w.seq != x.seq { + return 0, errors.New("leveldb/journal: stale writer") + } + if w.err != nil { + return 0, w.err + } + n0 := len(p) + for len(p) > 0 { + // Write a block, if it is full. + if w.j == blockSize { + w.fillHeader(false) + w.writeBlock() + if w.err != nil { + return 0, w.err + } + w.first = false + } + // Copy bytes into the buffer. + n := copy(w.buf[w.j:], p) + w.j += n + p = p[n:] + } + return n0, nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/key.go b/vendor/github.com/syndtr/goleveldb/leveldb/key.go new file mode 100644 index 0000000000..ad8f51ec85 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/key.go @@ -0,0 +1,143 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "encoding/binary" + "fmt" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +// ErrInternalKeyCorrupted records internal key corruption. +type ErrInternalKeyCorrupted struct { + Ikey []byte + Reason string +} + +func (e *ErrInternalKeyCorrupted) Error() string { + return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason) +} + +func newErrInternalKeyCorrupted(ikey []byte, reason string) error { + return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason}) +} + +type keyType uint + +func (kt keyType) String() string { + switch kt { + case keyTypeDel: + return "d" + case keyTypeVal: + return "v" + } + return fmt.Sprintf("<invalid:%#x>", uint(kt)) +} + +// Value types encoded as the last component of internal keys. +// Don't modify; this value are saved to disk. +const ( + keyTypeDel = keyType(0) + keyTypeVal = keyType(1) +) + +// keyTypeSeek defines the keyType that should be passed when constructing an +// internal key for seeking to a particular sequence number (since we +// sort sequence numbers in decreasing order and the value type is +// embedded as the low 8 bits in the sequence number in internal keys, +// we need to use the highest-numbered ValueType, not the lowest). +const keyTypeSeek = keyTypeVal + +const ( + // Maximum value possible for sequence number; the 8-bits are + // used by value type, so its can packed together in single + // 64-bit integer. + keyMaxSeq = (uint64(1) << 56) - 1 + // Maximum value possible for packed sequence number and type. + keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek) +) + +// Maximum number encoded in bytes. +var keyMaxNumBytes = make([]byte, 8) + +func init() { + binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum) +} + +type internalKey []byte + +func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey { + if seq > keyMaxSeq { + panic("leveldb: invalid sequence number") + } else if kt > keyTypeVal { + panic("leveldb: invalid type") + } + + dst = ensureBuffer(dst, len(ukey)+8) + copy(dst, ukey) + binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt)) + return internalKey(dst) +} + +func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) { + if len(ik) < 8 { + return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length") + } + num := binary.LittleEndian.Uint64(ik[len(ik)-8:]) + seq, kt = uint64(num>>8), keyType(num&0xff) + if kt > keyTypeVal { + return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type") + } + ukey = ik[:len(ik)-8] + return +} + +func validInternalKey(ik []byte) bool { + _, _, _, err := parseInternalKey(ik) + return err == nil +} + +func (ik internalKey) assert() { + if ik == nil { + panic("leveldb: nil internalKey") + } + if len(ik) < 8 { + panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik))) + } +} + +func (ik internalKey) ukey() []byte { + ik.assert() + return ik[:len(ik)-8] +} + +func (ik internalKey) num() uint64 { + ik.assert() + return binary.LittleEndian.Uint64(ik[len(ik)-8:]) +} + +func (ik internalKey) parseNum() (seq uint64, kt keyType) { + num := ik.num() + seq, kt = uint64(num>>8), keyType(num&0xff) + if kt > keyTypeVal { + panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt)) + } + return +} + +func (ik internalKey) String() string { + if ik == nil { + return "<nil>" + } + + if ukey, seq, kt, err := parseInternalKey(ik); err == nil { + return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq) + } + return fmt.Sprintf("<invalid:%#x>", []byte(ik)) +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go new file mode 100644 index 0000000000..b661c08a93 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go @@ -0,0 +1,475 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package memdb provides in-memory key/value database implementation. +package memdb + +import ( + "math/rand" + "sync" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// Common errors. +var ( + ErrNotFound = errors.ErrNotFound + ErrIterReleased = errors.New("leveldb/memdb: iterator released") +) + +const tMaxHeight = 12 + +type dbIter struct { + util.BasicReleaser + p *DB + slice *util.Range + node int + forward bool + key, value []byte + err error +} + +func (i *dbIter) fill(checkStart, checkLimit bool) bool { + if i.node != 0 { + n := i.p.nodeData[i.node] + m := n + i.p.nodeData[i.node+nKey] + i.key = i.p.kvData[n:m] + if i.slice != nil { + switch { + case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0: + fallthrough + case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0: + i.node = 0 + goto bail + } + } + i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]] + return true + } +bail: + i.key = nil + i.value = nil + return false +} + +func (i *dbIter) Valid() bool { + return i.node != 0 +} + +func (i *dbIter) First() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + i.forward = true + i.p.mu.RLock() + defer i.p.mu.RUnlock() + if i.slice != nil && i.slice.Start != nil { + i.node, _ = i.p.findGE(i.slice.Start, false) + } else { + i.node = i.p.nodeData[nNext] + } + return i.fill(false, true) +} + +func (i *dbIter) Last() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + i.forward = false + i.p.mu.RLock() + defer i.p.mu.RUnlock() + if i.slice != nil && i.slice.Limit != nil { + i.node = i.p.findLT(i.slice.Limit) + } else { + i.node = i.p.findLast() + } + return i.fill(true, false) +} + +func (i *dbIter) Seek(key []byte) bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + i.forward = true + i.p.mu.RLock() + defer i.p.mu.RUnlock() + if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 { + key = i.slice.Start + } + i.node, _ = i.p.findGE(key, false) + return i.fill(false, true) +} + +func (i *dbIter) Next() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + if i.node == 0 { + if !i.forward { + return i.First() + } + return false + } + i.forward = true + i.p.mu.RLock() + defer i.p.mu.RUnlock() + i.node = i.p.nodeData[i.node+nNext] + return i.fill(false, true) +} + +func (i *dbIter) Prev() bool { + if i.Released() { + i.err = ErrIterReleased + return false + } + + if i.node == 0 { + if i.forward { + return i.Last() + } + return false + } + i.forward = false + i.p.mu.RLock() + defer i.p.mu.RUnlock() + i.node = i.p.findLT(i.key) + return i.fill(true, false) +} + +func (i *dbIter) Key() []byte { + return i.key +} + +func (i *dbIter) Value() []byte { + return i.value +} + +func (i *dbIter) Error() error { return i.err } + +func (i *dbIter) Release() { + if !i.Released() { + i.p = nil + i.node = 0 + i.key = nil + i.value = nil + i.BasicReleaser.Release() + } +} + +const ( + nKV = iota + nKey + nVal + nHeight + nNext +) + +// DB is an in-memory key/value database. +type DB struct { + cmp comparer.BasicComparer + rnd *rand.Rand + + mu sync.RWMutex + kvData []byte + // Node data: + // [0] : KV offset + // [1] : Key length + // [2] : Value length + // [3] : Height + // [3..height] : Next nodes + nodeData []int + prevNode [tMaxHeight]int + maxHeight int + n int + kvSize int +} + +func (p *DB) randHeight() (h int) { + const branching = 4 + h = 1 + for h < tMaxHeight && p.rnd.Int()%branching == 0 { + h++ + } + return +} + +// Must hold RW-lock if prev == true, as it use shared prevNode slice. +func (p *DB) findGE(key []byte, prev bool) (int, bool) { + node := 0 + h := p.maxHeight - 1 + for { + next := p.nodeData[node+nNext+h] + cmp := 1 + if next != 0 { + o := p.nodeData[next] + cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) + } + if cmp < 0 { + // Keep searching in this list + node = next + } else { + if prev { + p.prevNode[h] = node + } else if cmp == 0 { + return next, true + } + if h == 0 { + return next, cmp == 0 + } + h-- + } + } +} + +func (p *DB) findLT(key []byte) int { + node := 0 + h := p.maxHeight - 1 + for { + next := p.nodeData[node+nNext+h] + o := p.nodeData[next] + if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 { + if h == 0 { + break + } + h-- + } else { + node = next + } + } + return node +} + +func (p *DB) findLast() int { + node := 0 + h := p.maxHeight - 1 + for { + next := p.nodeData[node+nNext+h] + if next == 0 { + if h == 0 { + break + } + h-- + } else { + node = next + } + } + return node +} + +// Put sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. +// +// It is safe to modify the contents of the arguments after Put returns. +func (p *DB) Put(key []byte, value []byte) error { + p.mu.Lock() + defer p.mu.Unlock() + + if node, exact := p.findGE(key, true); exact { + kvOffset := len(p.kvData) + p.kvData = append(p.kvData, key...) + p.kvData = append(p.kvData, value...) + p.nodeData[node] = kvOffset + m := p.nodeData[node+nVal] + p.nodeData[node+nVal] = len(value) + p.kvSize += len(value) - m + return nil + } + + h := p.randHeight() + if h > p.maxHeight { + for i := p.maxHeight; i < h; i++ { + p.prevNode[i] = 0 + } + p.maxHeight = h + } + + kvOffset := len(p.kvData) + p.kvData = append(p.kvData, key...) + p.kvData = append(p.kvData, value...) + // Node + node := len(p.nodeData) + p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h) + for i, n := range p.prevNode[:h] { + m := n + nNext + i + p.nodeData = append(p.nodeData, p.nodeData[m]) + p.nodeData[m] = node + } + + p.kvSize += len(key) + len(value) + p.n++ + return nil +} + +// Delete deletes the value for the given key. It returns ErrNotFound if +// the DB does not contain the key. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (p *DB) Delete(key []byte) error { + p.mu.Lock() + defer p.mu.Unlock() + + node, exact := p.findGE(key, true) + if !exact { + return ErrNotFound + } + + h := p.nodeData[node+nHeight] + for i, n := range p.prevNode[:h] { + m := n + nNext + i + p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i] + } + + p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal] + p.n-- + return nil +} + +// Contains returns true if the given key are in the DB. +// +// It is safe to modify the contents of the arguments after Contains returns. +func (p *DB) Contains(key []byte) bool { + p.mu.RLock() + _, exact := p.findGE(key, false) + p.mu.RUnlock() + return exact +} + +// Get gets the value for the given key. It returns error.ErrNotFound if the +// DB does not contain the key. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Get returns. +func (p *DB) Get(key []byte) (value []byte, err error) { + p.mu.RLock() + if node, exact := p.findGE(key, false); exact { + o := p.nodeData[node] + p.nodeData[node+nKey] + value = p.kvData[o : o+p.nodeData[node+nVal]] + } else { + err = ErrNotFound + } + p.mu.RUnlock() + return +} + +// Find finds key/value pair whose key is greater than or equal to the +// given key. It returns ErrNotFound if the table doesn't contain +// such pair. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Find returns. +func (p *DB) Find(key []byte) (rkey, value []byte, err error) { + p.mu.RLock() + if node, _ := p.findGE(key, false); node != 0 { + n := p.nodeData[node] + m := n + p.nodeData[node+nKey] + rkey = p.kvData[n:m] + value = p.kvData[m : m+p.nodeData[node+nVal]] + } else { + err = ErrNotFound + } + p.mu.RUnlock() + return +} + +// NewIterator returns an iterator of the DB. +// The returned iterator is not safe for concurrent use, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently with modifying its +// underlying DB. However, the resultant key/value pairs are not guaranteed +// to be a consistent snapshot of the DB at a particular point in time. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (p *DB) NewIterator(slice *util.Range) iterator.Iterator { + return &dbIter{p: p, slice: slice} +} + +// Capacity returns keys/values buffer capacity. +func (p *DB) Capacity() int { + p.mu.RLock() + defer p.mu.RUnlock() + return cap(p.kvData) +} + +// Size returns sum of keys and values length. Note that deleted +// key/value will not be accounted for, but it will still consume +// the buffer, since the buffer is append only. +func (p *DB) Size() int { + p.mu.RLock() + defer p.mu.RUnlock() + return p.kvSize +} + +// Free returns keys/values free buffer before need to grow. +func (p *DB) Free() int { + p.mu.RLock() + defer p.mu.RUnlock() + return cap(p.kvData) - len(p.kvData) +} + +// Len returns the number of entries in the DB. +func (p *DB) Len() int { + p.mu.RLock() + defer p.mu.RUnlock() + return p.n +} + +// Reset resets the DB to initial empty state. Allows reuse the buffer. +func (p *DB) Reset() { + p.mu.Lock() + p.rnd = rand.New(rand.NewSource(0xdeadbeef)) + p.maxHeight = 1 + p.n = 0 + p.kvSize = 0 + p.kvData = p.kvData[:0] + p.nodeData = p.nodeData[:nNext+tMaxHeight] + p.nodeData[nKV] = 0 + p.nodeData[nKey] = 0 + p.nodeData[nVal] = 0 + p.nodeData[nHeight] = tMaxHeight + for n := 0; n < tMaxHeight; n++ { + p.nodeData[nNext+n] = 0 + p.prevNode[n] = 0 + } + p.mu.Unlock() +} + +// New creates a new initialized in-memory key/value DB. The capacity +// is the initial key/value buffer capacity. The capacity is advisory, +// not enforced. +// +// This DB is append-only, deleting an entry would remove entry node but not +// reclaim KV buffer. +// +// The returned DB instance is safe for concurrent use. +func New(cmp comparer.BasicComparer, capacity int) *DB { + p := &DB{ + cmp: cmp, + rnd: rand.New(rand.NewSource(0xdeadbeef)), + maxHeight: 1, + kvData: make([]byte, 0, capacity), + nodeData: make([]int, 4+tMaxHeight), + } + p.nodeData[nHeight] = tMaxHeight + return p +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go new file mode 100644 index 0000000000..528b164233 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go @@ -0,0 +1,697 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package opt provides sets of options used by LevelDB. +package opt + +import ( + "math" + + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/filter" +) + +const ( + KiB = 1024 + MiB = KiB * 1024 + GiB = MiB * 1024 +) + +var ( + DefaultBlockCacher = LRUCacher + DefaultBlockCacheCapacity = 8 * MiB + DefaultBlockRestartInterval = 16 + DefaultBlockSize = 4 * KiB + DefaultCompactionExpandLimitFactor = 25 + DefaultCompactionGPOverlapsFactor = 10 + DefaultCompactionL0Trigger = 4 + DefaultCompactionSourceLimitFactor = 1 + DefaultCompactionTableSize = 2 * MiB + DefaultCompactionTableSizeMultiplier = 1.0 + DefaultCompactionTotalSize = 10 * MiB + DefaultCompactionTotalSizeMultiplier = 10.0 + DefaultCompressionType = SnappyCompression + DefaultIteratorSamplingRate = 1 * MiB + DefaultOpenFilesCacher = LRUCacher + DefaultOpenFilesCacheCapacity = 500 + DefaultWriteBuffer = 4 * MiB + DefaultWriteL0PauseTrigger = 12 + DefaultWriteL0SlowdownTrigger = 8 +) + +// Cacher is a caching algorithm. +type Cacher interface { + New(capacity int) cache.Cacher +} + +type CacherFunc struct { + NewFunc func(capacity int) cache.Cacher +} + +func (f *CacherFunc) New(capacity int) cache.Cacher { + if f.NewFunc != nil { + return f.NewFunc(capacity) + } + return nil +} + +func noCacher(int) cache.Cacher { return nil } + +var ( + // LRUCacher is the LRU-cache algorithm. + LRUCacher = &CacherFunc{cache.NewLRU} + + // NoCacher is the value to disable caching algorithm. + NoCacher = &CacherFunc{} +) + +// Compression is the 'sorted table' block compression algorithm to use. +type Compression uint + +func (c Compression) String() string { + switch c { + case DefaultCompression: + return "default" + case NoCompression: + return "none" + case SnappyCompression: + return "snappy" + } + return "invalid" +} + +const ( + DefaultCompression Compression = iota + NoCompression + SnappyCompression + nCompression +) + +// Strict is the DB 'strict level'. +type Strict uint + +const ( + // If present then a corrupted or invalid chunk or block in manifest + // journal will cause an error instead of being dropped. + // This will prevent database with corrupted manifest to be opened. + StrictManifest Strict = 1 << iota + + // If present then journal chunk checksum will be verified. + StrictJournalChecksum + + // If present then a corrupted or invalid chunk or block in journal + // will cause an error instead of being dropped. + // This will prevent database with corrupted journal to be opened. + StrictJournal + + // If present then 'sorted table' block checksum will be verified. + // This has effect on both 'read operation' and compaction. + StrictBlockChecksum + + // If present then a corrupted 'sorted table' will fails compaction. + // The database will enter read-only mode. + StrictCompaction + + // If present then a corrupted 'sorted table' will halts 'read operation'. + StrictReader + + // If present then leveldb.Recover will drop corrupted 'sorted table'. + StrictRecovery + + // This only applicable for ReadOptions, if present then this ReadOptions + // 'strict level' will override global ones. + StrictOverride + + // StrictAll enables all strict flags. + StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery + + // DefaultStrict is the default strict flags. Specify any strict flags + // will override default strict flags as whole (i.e. not OR'ed). + DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader + + // NoStrict disables all strict flags. Override default strict flags. + NoStrict = ^StrictAll +) + +// Options holds the optional parameters for the DB at large. +type Options struct { + // AltFilters defines one or more 'alternative filters'. + // 'alternative filters' will be used during reads if a filter block + // does not match with the 'effective filter'. + // + // The default value is nil + AltFilters []filter.Filter + + // BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching. + // Specify NoCacher to disable caching algorithm. + // + // The default value is LRUCacher. + BlockCacher Cacher + + // BlockCacheCapacity defines the capacity of the 'sorted table' block caching. + // Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher. + // + // The default value is 8MiB. + BlockCacheCapacity int + + // BlockCacheEvictRemoved allows enable forced-eviction on cached block belonging + // to removed 'sorted table'. + // + // The default if false. + BlockCacheEvictRemoved bool + + // BlockRestartInterval is the number of keys between restart points for + // delta encoding of keys. + // + // The default value is 16. + BlockRestartInterval int + + // BlockSize is the minimum uncompressed size in bytes of each 'sorted table' + // block. + // + // The default value is 4KiB. + BlockSize int + + // CompactionExpandLimitFactor limits compaction size after expanded. + // This will be multiplied by table size limit at compaction target level. + // + // The default value is 25. + CompactionExpandLimitFactor int + + // CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a + // single 'sorted table' generates. + // This will be multiplied by table size limit at grandparent level. + // + // The default value is 10. + CompactionGPOverlapsFactor int + + // CompactionL0Trigger defines number of 'sorted table' at level-0 that will + // trigger compaction. + // + // The default value is 4. + CompactionL0Trigger int + + // CompactionSourceLimitFactor limits compaction source size. This doesn't apply to + // level-0. + // This will be multiplied by table size limit at compaction target level. + // + // The default value is 1. + CompactionSourceLimitFactor int + + // CompactionTableSize limits size of 'sorted table' that compaction generates. + // The limits for each level will be calculated as: + // CompactionTableSize * (CompactionTableSizeMultiplier ^ Level) + // The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel. + // + // The default value is 2MiB. + CompactionTableSize int + + // CompactionTableSizeMultiplier defines multiplier for CompactionTableSize. + // + // The default value is 1. + CompactionTableSizeMultiplier float64 + + // CompactionTableSizeMultiplierPerLevel defines per-level multiplier for + // CompactionTableSize. + // Use zero to skip a level. + // + // The default value is nil. + CompactionTableSizeMultiplierPerLevel []float64 + + // CompactionTotalSize limits total size of 'sorted table' for each level. + // The limits for each level will be calculated as: + // CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level) + // The multiplier for each level can also fine-tuned using + // CompactionTotalSizeMultiplierPerLevel. + // + // The default value is 10MiB. + CompactionTotalSize int + + // CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize. + // + // The default value is 10. + CompactionTotalSizeMultiplier float64 + + // CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for + // CompactionTotalSize. + // Use zero to skip a level. + // + // The default value is nil. + CompactionTotalSizeMultiplierPerLevel []float64 + + // Comparer defines a total ordering over the space of []byte keys: a 'less + // than' relationship. The same comparison algorithm must be used for reads + // and writes over the lifetime of the DB. + // + // The default value uses the same ordering as bytes.Compare. + Comparer comparer.Comparer + + // Compression defines the 'sorted table' block compression to use. + // + // The default value (DefaultCompression) uses snappy compression. + Compression Compression + + // DisableBufferPool allows disable use of util.BufferPool functionality. + // + // The default value is false. + DisableBufferPool bool + + // DisableBlockCache allows disable use of cache.Cache functionality on + // 'sorted table' block. + // + // The default value is false. + DisableBlockCache bool + + // DisableCompactionBackoff allows disable compaction retry backoff. + // + // The default value is false. + DisableCompactionBackoff bool + + // DisableLargeBatchTransaction allows disabling switch-to-transaction mode + // on large batch write. If enable batch writes large than WriteBuffer will + // use transaction. + // + // The default is false. + DisableLargeBatchTransaction bool + + // ErrorIfExist defines whether an error should returned if the DB already + // exist. + // + // The default value is false. + ErrorIfExist bool + + // ErrorIfMissing defines whether an error should returned if the DB is + // missing. If false then the database will be created if missing, otherwise + // an error will be returned. + // + // The default value is false. + ErrorIfMissing bool + + // Filter defines an 'effective filter' to use. An 'effective filter' + // if defined will be used to generate per-table filter block. + // The filter name will be stored on disk. + // During reads LevelDB will try to find matching filter from + // 'effective filter' and 'alternative filters'. + // + // Filter can be changed after a DB has been created. It is recommended + // to put old filter to the 'alternative filters' to mitigate lack of + // filter during transition period. + // + // A filter is used to reduce disk reads when looking for a specific key. + // + // The default value is nil. + Filter filter.Filter + + // IteratorSamplingRate defines approximate gap (in bytes) between read + // sampling of an iterator. The samples will be used to determine when + // compaction should be triggered. + // + // The default is 1MiB. + IteratorSamplingRate int + + // NoSync allows completely disable fsync. + // + // The default is false. + NoSync bool + + // NoWriteMerge allows disabling write merge. + // + // The default is false. + NoWriteMerge bool + + // OpenFilesCacher provides cache algorithm for open files caching. + // Specify NoCacher to disable caching algorithm. + // + // The default value is LRUCacher. + OpenFilesCacher Cacher + + // OpenFilesCacheCapacity defines the capacity of the open files caching. + // Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher. + // + // The default value is 500. + OpenFilesCacheCapacity int + + // If true then opens DB in read-only mode. + // + // The default value is false. + ReadOnly bool + + // Strict defines the DB strict level. + Strict Strict + + // WriteBuffer defines maximum size of a 'memdb' before flushed to + // 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk + // unsorted journal. + // + // LevelDB may held up to two 'memdb' at the same time. + // + // The default value is 4MiB. + WriteBuffer int + + // WriteL0StopTrigger defines number of 'sorted table' at level-0 that will + // pause write. + // + // The default value is 12. + WriteL0PauseTrigger int + + // WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that + // will trigger write slowdown. + // + // The default value is 8. + WriteL0SlowdownTrigger int +} + +func (o *Options) GetAltFilters() []filter.Filter { + if o == nil { + return nil + } + return o.AltFilters +} + +func (o *Options) GetBlockCacher() Cacher { + if o == nil || o.BlockCacher == nil { + return DefaultBlockCacher + } else if o.BlockCacher == NoCacher { + return nil + } + return o.BlockCacher +} + +func (o *Options) GetBlockCacheCapacity() int { + if o == nil || o.BlockCacheCapacity == 0 { + return DefaultBlockCacheCapacity + } else if o.BlockCacheCapacity < 0 { + return 0 + } + return o.BlockCacheCapacity +} + +func (o *Options) GetBlockCacheEvictRemoved() bool { + if o == nil { + return false + } + return o.BlockCacheEvictRemoved +} + +func (o *Options) GetBlockRestartInterval() int { + if o == nil || o.BlockRestartInterval <= 0 { + return DefaultBlockRestartInterval + } + return o.BlockRestartInterval +} + +func (o *Options) GetBlockSize() int { + if o == nil || o.BlockSize <= 0 { + return DefaultBlockSize + } + return o.BlockSize +} + +func (o *Options) GetCompactionExpandLimit(level int) int { + factor := DefaultCompactionExpandLimitFactor + if o != nil && o.CompactionExpandLimitFactor > 0 { + factor = o.CompactionExpandLimitFactor + } + return o.GetCompactionTableSize(level+1) * factor +} + +func (o *Options) GetCompactionGPOverlaps(level int) int { + factor := DefaultCompactionGPOverlapsFactor + if o != nil && o.CompactionGPOverlapsFactor > 0 { + factor = o.CompactionGPOverlapsFactor + } + return o.GetCompactionTableSize(level+2) * factor +} + +func (o *Options) GetCompactionL0Trigger() int { + if o == nil || o.CompactionL0Trigger == 0 { + return DefaultCompactionL0Trigger + } + return o.CompactionL0Trigger +} + +func (o *Options) GetCompactionSourceLimit(level int) int { + factor := DefaultCompactionSourceLimitFactor + if o != nil && o.CompactionSourceLimitFactor > 0 { + factor = o.CompactionSourceLimitFactor + } + return o.GetCompactionTableSize(level+1) * factor +} + +func (o *Options) GetCompactionTableSize(level int) int { + var ( + base = DefaultCompactionTableSize + mult float64 + ) + if o != nil { + if o.CompactionTableSize > 0 { + base = o.CompactionTableSize + } + if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 { + mult = o.CompactionTableSizeMultiplierPerLevel[level] + } else if o.CompactionTableSizeMultiplier > 0 { + mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level)) + } + } + if mult == 0 { + mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level)) + } + return int(float64(base) * mult) +} + +func (o *Options) GetCompactionTotalSize(level int) int64 { + var ( + base = DefaultCompactionTotalSize + mult float64 + ) + if o != nil { + if o.CompactionTotalSize > 0 { + base = o.CompactionTotalSize + } + if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 { + mult = o.CompactionTotalSizeMultiplierPerLevel[level] + } else if o.CompactionTotalSizeMultiplier > 0 { + mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level)) + } + } + if mult == 0 { + mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level)) + } + return int64(float64(base) * mult) +} + +func (o *Options) GetComparer() comparer.Comparer { + if o == nil || o.Comparer == nil { + return comparer.DefaultComparer + } + return o.Comparer +} + +func (o *Options) GetCompression() Compression { + if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression { + return DefaultCompressionType + } + return o.Compression +} + +func (o *Options) GetDisableBufferPool() bool { + if o == nil { + return false + } + return o.DisableBufferPool +} + +func (o *Options) GetDisableBlockCache() bool { + if o == nil { + return false + } + return o.DisableBlockCache +} + +func (o *Options) GetDisableCompactionBackoff() bool { + if o == nil { + return false + } + return o.DisableCompactionBackoff +} + +func (o *Options) GetDisableLargeBatchTransaction() bool { + if o == nil { + return false + } + return o.DisableLargeBatchTransaction +} + +func (o *Options) GetErrorIfExist() bool { + if o == nil { + return false + } + return o.ErrorIfExist +} + +func (o *Options) GetErrorIfMissing() bool { + if o == nil { + return false + } + return o.ErrorIfMissing +} + +func (o *Options) GetFilter() filter.Filter { + if o == nil { + return nil + } + return o.Filter +} + +func (o *Options) GetIteratorSamplingRate() int { + if o == nil || o.IteratorSamplingRate <= 0 { + return DefaultIteratorSamplingRate + } + return o.IteratorSamplingRate +} + +func (o *Options) GetNoSync() bool { + if o == nil { + return false + } + return o.NoSync +} + +func (o *Options) GetNoWriteMerge() bool { + if o == nil { + return false + } + return o.NoWriteMerge +} + +func (o *Options) GetOpenFilesCacher() Cacher { + if o == nil || o.OpenFilesCacher == nil { + return DefaultOpenFilesCacher + } + if o.OpenFilesCacher == NoCacher { + return nil + } + return o.OpenFilesCacher +} + +func (o *Options) GetOpenFilesCacheCapacity() int { + if o == nil || o.OpenFilesCacheCapacity == 0 { + return DefaultOpenFilesCacheCapacity + } else if o.OpenFilesCacheCapacity < 0 { + return 0 + } + return o.OpenFilesCacheCapacity +} + +func (o *Options) GetReadOnly() bool { + if o == nil { + return false + } + return o.ReadOnly +} + +func (o *Options) GetStrict(strict Strict) bool { + if o == nil || o.Strict == 0 { + return DefaultStrict&strict != 0 + } + return o.Strict&strict != 0 +} + +func (o *Options) GetWriteBuffer() int { + if o == nil || o.WriteBuffer <= 0 { + return DefaultWriteBuffer + } + return o.WriteBuffer +} + +func (o *Options) GetWriteL0PauseTrigger() int { + if o == nil || o.WriteL0PauseTrigger == 0 { + return DefaultWriteL0PauseTrigger + } + return o.WriteL0PauseTrigger +} + +func (o *Options) GetWriteL0SlowdownTrigger() int { + if o == nil || o.WriteL0SlowdownTrigger == 0 { + return DefaultWriteL0SlowdownTrigger + } + return o.WriteL0SlowdownTrigger +} + +// ReadOptions holds the optional parameters for 'read operation'. The +// 'read operation' includes Get, Find and NewIterator. +type ReadOptions struct { + // DontFillCache defines whether block reads for this 'read operation' + // should be cached. If false then the block will be cached. This does + // not affects already cached block. + // + // The default value is false. + DontFillCache bool + + // Strict will be OR'ed with global DB 'strict level' unless StrictOverride + // is present. Currently only StrictReader that has effect here. + Strict Strict +} + +func (ro *ReadOptions) GetDontFillCache() bool { + if ro == nil { + return false + } + return ro.DontFillCache +} + +func (ro *ReadOptions) GetStrict(strict Strict) bool { + if ro == nil { + return false + } + return ro.Strict&strict != 0 +} + +// WriteOptions holds the optional parameters for 'write operation'. The +// 'write operation' includes Write, Put and Delete. +type WriteOptions struct { + // NoWriteMerge allows disabling write merge. + // + // The default is false. + NoWriteMerge bool + + // Sync is whether to sync underlying writes from the OS buffer cache + // through to actual disk, if applicable. Setting Sync can result in + // slower writes. + // + // If false, and the machine crashes, then some recent writes may be lost. + // Note that if it is just the process that crashes (and the machine does + // not) then no writes will be lost. + // + // In other words, Sync being false has the same semantics as a write + // system call. Sync being true means write followed by fsync. + // + // The default value is false. + Sync bool +} + +func (wo *WriteOptions) GetNoWriteMerge() bool { + if wo == nil { + return false + } + return wo.NoWriteMerge +} + +func (wo *WriteOptions) GetSync() bool { + if wo == nil { + return false + } + return wo.Sync +} + +func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool { + if ro.GetStrict(StrictOverride) { + return ro.GetStrict(strict) + } else { + return o.GetStrict(strict) || ro.GetStrict(strict) + } +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/options.go new file mode 100644 index 0000000000..b072b1ac4c --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/options.go @@ -0,0 +1,107 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +func dupOptions(o *opt.Options) *opt.Options { + newo := &opt.Options{} + if o != nil { + *newo = *o + } + if newo.Strict == 0 { + newo.Strict = opt.DefaultStrict + } + return newo +} + +func (s *session) setOptions(o *opt.Options) { + no := dupOptions(o) + // Alternative filters. + if filters := o.GetAltFilters(); len(filters) > 0 { + no.AltFilters = make([]filter.Filter, len(filters)) + for i, filter := range filters { + no.AltFilters[i] = &iFilter{filter} + } + } + // Comparer. + s.icmp = &iComparer{o.GetComparer()} + no.Comparer = s.icmp + // Filter. + if filter := o.GetFilter(); filter != nil { + no.Filter = &iFilter{filter} + } + + s.o = &cachedOptions{Options: no} + s.o.cache() +} + +const optCachedLevel = 7 + +type cachedOptions struct { + *opt.Options + + compactionExpandLimit []int + compactionGPOverlaps []int + compactionSourceLimit []int + compactionTableSize []int + compactionTotalSize []int64 +} + +func (co *cachedOptions) cache() { + co.compactionExpandLimit = make([]int, optCachedLevel) + co.compactionGPOverlaps = make([]int, optCachedLevel) + co.compactionSourceLimit = make([]int, optCachedLevel) + co.compactionTableSize = make([]int, optCachedLevel) + co.compactionTotalSize = make([]int64, optCachedLevel) + + for level := 0; level < optCachedLevel; level++ { + co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level) + co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level) + co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level) + co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level) + co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level) + } +} + +func (co *cachedOptions) GetCompactionExpandLimit(level int) int { + if level < optCachedLevel { + return co.compactionExpandLimit[level] + } + return co.Options.GetCompactionExpandLimit(level) +} + +func (co *cachedOptions) GetCompactionGPOverlaps(level int) int { + if level < optCachedLevel { + return co.compactionGPOverlaps[level] + } + return co.Options.GetCompactionGPOverlaps(level) +} + +func (co *cachedOptions) GetCompactionSourceLimit(level int) int { + if level < optCachedLevel { + return co.compactionSourceLimit[level] + } + return co.Options.GetCompactionSourceLimit(level) +} + +func (co *cachedOptions) GetCompactionTableSize(level int) int { + if level < optCachedLevel { + return co.compactionTableSize[level] + } + return co.Options.GetCompactionTableSize(level) +} + +func (co *cachedOptions) GetCompactionTotalSize(level int) int64 { + if level < optCachedLevel { + return co.compactionTotalSize[level] + } + return co.Options.GetCompactionTotalSize(level) +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session.go b/vendor/github.com/syndtr/goleveldb/leveldb/session.go new file mode 100644 index 0000000000..3f391f9346 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session.go @@ -0,0 +1,210 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "io" + "os" + "sync" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +// ErrManifestCorrupted records manifest corruption. This error will be +// wrapped with errors.ErrCorrupted. +type ErrManifestCorrupted struct { + Field string + Reason string +} + +func (e *ErrManifestCorrupted) Error() string { + return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason) +} + +func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error { + return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason}) +} + +// session represent a persistent database session. +type session struct { + // Need 64-bit alignment. + stNextFileNum int64 // current unused file number + stJournalNum int64 // current journal file number; need external synchronization + stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb + stTempFileNum int64 + stSeqNum uint64 // last mem compacted seq; need external synchronization + + stor *iStorage + storLock storage.Locker + o *cachedOptions + icmp *iComparer + tops *tOps + fileRef map[int64]int + + manifest *journal.Writer + manifestWriter storage.Writer + manifestFd storage.FileDesc + + stCompPtrs []internalKey // compaction pointers; need external synchronization + stVersion *version // current version + vmu sync.Mutex +} + +// Creates new initialized session instance. +func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) { + if stor == nil { + return nil, os.ErrInvalid + } + storLock, err := stor.Lock() + if err != nil { + return + } + s = &session{ + stor: newIStorage(stor), + storLock: storLock, + fileRef: make(map[int64]int), + } + s.setOptions(o) + s.tops = newTableOps(s) + s.setVersion(newVersion(s)) + s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed") + return +} + +// Close session. +func (s *session) close() { + s.tops.close() + if s.manifest != nil { + s.manifest.Close() + } + if s.manifestWriter != nil { + s.manifestWriter.Close() + } + s.manifest = nil + s.manifestWriter = nil + s.setVersion(&version{s: s, closing: true}) +} + +// Release session lock. +func (s *session) release() { + s.storLock.Unlock() +} + +// Create a new database session; need external synchronization. +func (s *session) create() error { + // create manifest + return s.newManifest(nil, nil) +} + +// Recover a database session; need external synchronization. +func (s *session) recover() (err error) { + defer func() { + if os.IsNotExist(err) { + // Don't return os.ErrNotExist if the underlying storage contains + // other files that belong to LevelDB. So the DB won't get trashed. + if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 { + err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}} + } + } + }() + + fd, err := s.stor.GetMeta() + if err != nil { + return + } + + reader, err := s.stor.Open(fd) + if err != nil { + return + } + defer reader.Close() + + var ( + // Options. + strict = s.o.GetStrict(opt.StrictManifest) + + jr = journal.NewReader(reader, dropper{s, fd}, strict, true) + rec = &sessionRecord{} + staging = s.stVersion.newStaging() + ) + for { + var r io.Reader + r, err = jr.Next() + if err != nil { + if err == io.EOF { + err = nil + break + } + return errors.SetFd(err, fd) + } + + err = rec.decode(r) + if err == nil { + // save compact pointers + for _, r := range rec.compPtrs { + s.setCompPtr(r.level, internalKey(r.ikey)) + } + // commit record to version staging + staging.commit(rec) + } else { + err = errors.SetFd(err, fd) + if strict || !errors.IsCorrupted(err) { + return + } + s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd)) + } + rec.resetCompPtrs() + rec.resetAddedTables() + rec.resetDeletedTables() + } + + switch { + case !rec.has(recComparer): + return newErrManifestCorrupted(fd, "comparer", "missing") + case rec.comparer != s.icmp.uName(): + return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer)) + case !rec.has(recNextFileNum): + return newErrManifestCorrupted(fd, "next-file-num", "missing") + case !rec.has(recJournalNum): + return newErrManifestCorrupted(fd, "journal-file-num", "missing") + case !rec.has(recSeqNum): + return newErrManifestCorrupted(fd, "seq-num", "missing") + } + + s.manifestFd = fd + s.setVersion(staging.finish()) + s.setNextFileNum(rec.nextFileNum) + s.recordCommited(rec) + return nil +} + +// Commit session; need external synchronization. +func (s *session) commit(r *sessionRecord) (err error) { + v := s.version() + defer v.release() + + // spawn new version based on current version + nv := v.spawn(r) + + if s.manifest == nil { + // manifest journal writer not yet created, create one + err = s.newManifest(r, nv) + } else { + err = s.flushManifest(r) + } + + // finally, apply new version if no error rise + if err == nil { + s.setVersion(nv) + } + + return +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go new file mode 100644 index 0000000000..089cd00b26 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go @@ -0,0 +1,302 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int { + v := s.version() + defer v.release() + return v.pickMemdbLevel(umin, umax, maxLevel) +} + +func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) { + // Create sorted table. + iter := mdb.NewIterator(nil) + defer iter.Release() + t, n, err := s.tops.createFrom(iter) + if err != nil { + return 0, err + } + + // Pick level other than zero can cause compaction issue with large + // bulk insert and delete on strictly incrementing key-space. The + // problem is that the small deletion markers trapped at lower level, + // while key/value entries keep growing at higher level. Since the + // key-space is strictly incrementing it will not overlaps with + // higher level, thus maximum possible level is always picked, while + // overlapping deletion marker pushed into lower level. + // See: https://github.com/syndtr/goleveldb/issues/127. + flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel) + rec.addTableFile(flushLevel, t) + + s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax) + return flushLevel, nil +} + +// Pick a compaction based on current state; need external synchronization. +func (s *session) pickCompaction() *compaction { + v := s.version() + + var sourceLevel int + var t0 tFiles + if v.cScore >= 1 { + sourceLevel = v.cLevel + cptr := s.getCompPtr(sourceLevel) + tables := v.levels[sourceLevel] + for _, t := range tables { + if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 { + t0 = append(t0, t) + break + } + } + if len(t0) == 0 { + t0 = append(t0, tables[0]) + } + } else { + if p := atomic.LoadPointer(&v.cSeek); p != nil { + ts := (*tSet)(p) + sourceLevel = ts.level + t0 = append(t0, ts.table) + } else { + v.release() + return nil + } + } + + return newCompaction(s, v, sourceLevel, t0) +} + +// Create compaction from given level and range; need external synchronization. +func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction { + v := s.version() + + if sourceLevel >= len(v.levels) { + v.release() + return nil + } + + t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0) + if len(t0) == 0 { + v.release() + return nil + } + + // Avoid compacting too much in one shot in case the range is large. + // But we cannot do this for level-0 since level-0 files can overlap + // and we must not pick one file and drop another older file if the + // two files overlap. + if !noLimit && sourceLevel > 0 { + limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel)) + total := int64(0) + for i, t := range t0 { + total += t.size + if total >= limit { + s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1) + t0 = t0[:i+1] + break + } + } + } + + return newCompaction(s, v, sourceLevel, t0) +} + +func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction { + c := &compaction{ + s: s, + v: v, + sourceLevel: sourceLevel, + levels: [2]tFiles{t0, nil}, + maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)), + tPtrs: make([]int, len(v.levels)), + } + c.expand() + c.save() + return c +} + +// compaction represent a compaction state. +type compaction struct { + s *session + v *version + + sourceLevel int + levels [2]tFiles + maxGPOverlaps int64 + + gp tFiles + gpi int + seenKey bool + gpOverlappedBytes int64 + imin, imax internalKey + tPtrs []int + released bool + + snapGPI int + snapSeenKey bool + snapGPOverlappedBytes int64 + snapTPtrs []int +} + +func (c *compaction) save() { + c.snapGPI = c.gpi + c.snapSeenKey = c.seenKey + c.snapGPOverlappedBytes = c.gpOverlappedBytes + c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...) +} + +func (c *compaction) restore() { + c.gpi = c.snapGPI + c.seenKey = c.snapSeenKey + c.gpOverlappedBytes = c.snapGPOverlappedBytes + c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...) +} + +func (c *compaction) release() { + if !c.released { + c.released = true + c.v.release() + } +} + +// Expand compacted tables; need external synchronization. +func (c *compaction) expand() { + limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel)) + vt0 := c.v.levels[c.sourceLevel] + vt1 := tFiles{} + if level := c.sourceLevel + 1; level < len(c.v.levels) { + vt1 = c.v.levels[level] + } + + t0, t1 := c.levels[0], c.levels[1] + imin, imax := t0.getRange(c.s.icmp) + // We expand t0 here just incase ukey hop across tables. + t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0) + if len(t0) != len(c.levels[0]) { + imin, imax = t0.getRange(c.s.icmp) + } + t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false) + // Get entire range covered by compaction. + amin, amax := append(t0, t1...).getRange(c.s.icmp) + + // See if we can grow the number of inputs in "sourceLevel" without + // changing the number of "sourceLevel+1" files we pick up. + if len(t1) > 0 { + exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0) + if len(exp0) > len(t0) && t1.size()+exp0.size() < limit { + xmin, xmax := exp0.getRange(c.s.icmp) + exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false) + if len(exp1) == len(t1) { + c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)", + c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), + len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size()))) + imin, imax = xmin, xmax + t0, t1 = exp0, exp1 + amin, amax = append(t0, t1...).getRange(c.s.icmp) + } + } + } + + // Compute the set of grandparent files that overlap this compaction + // (parent == sourceLevel+1; grandparent == sourceLevel+2) + if level := c.sourceLevel + 2; level < len(c.v.levels) { + c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) + } + + c.levels[0], c.levels[1] = t0, t1 + c.imin, c.imax = imin, imax +} + +// Check whether compaction is trivial. +func (c *compaction) trivial() bool { + return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps +} + +func (c *compaction) baseLevelForKey(ukey []byte) bool { + for level := c.sourceLevel + 2; level < len(c.v.levels); level++ { + tables := c.v.levels[level] + for c.tPtrs[level] < len(tables) { + t := tables[c.tPtrs[level]] + if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 { + // We've advanced far enough. + if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 { + // Key falls in this file's range, so definitely not base level. + return false + } + break + } + c.tPtrs[level]++ + } + } + return true +} + +func (c *compaction) shouldStopBefore(ikey internalKey) bool { + for ; c.gpi < len(c.gp); c.gpi++ { + gp := c.gp[c.gpi] + if c.s.icmp.Compare(ikey, gp.imax) <= 0 { + break + } + if c.seenKey { + c.gpOverlappedBytes += gp.size + } + } + c.seenKey = true + + if c.gpOverlappedBytes > c.maxGPOverlaps { + // Too much overlap for current output; start new output. + c.gpOverlappedBytes = 0 + return true + } + return false +} + +// Creates an iterator. +func (c *compaction) newIterator() iterator.Iterator { + // Creates iterator slice. + icap := len(c.levels) + if c.sourceLevel == 0 { + // Special case for level-0. + icap = len(c.levels[0]) + 1 + } + its := make([]iterator.Iterator, 0, icap) + + // Options. + ro := &opt.ReadOptions{ + DontFillCache: true, + Strict: opt.StrictOverride, + } + strict := c.s.o.GetStrict(opt.StrictCompaction) + if strict { + ro.Strict |= opt.StrictReader + } + + for i, tables := range c.levels { + if len(tables) == 0 { + continue + } + + // Level-0 is not sorted and may overlaps each other. + if c.sourceLevel+i == 0 { + for _, t := range tables { + its = append(its, c.s.tops.newIterator(t, nil, ro)) + } + } else { + it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict) + its = append(its, it) + } + } + + return iterator.NewMergedIterator(its, c.s.icmp, strict) +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go new file mode 100644 index 0000000000..854e1aa6f9 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go @@ -0,0 +1,323 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bufio" + "encoding/binary" + "io" + "strings" + + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +type byteReader interface { + io.Reader + io.ByteReader +} + +// These numbers are written to disk and should not be changed. +const ( + recComparer = 1 + recJournalNum = 2 + recNextFileNum = 3 + recSeqNum = 4 + recCompPtr = 5 + recDelTable = 6 + recAddTable = 7 + // 8 was used for large value refs + recPrevJournalNum = 9 +) + +type cpRecord struct { + level int + ikey internalKey +} + +type atRecord struct { + level int + num int64 + size int64 + imin internalKey + imax internalKey +} + +type dtRecord struct { + level int + num int64 +} + +type sessionRecord struct { + hasRec int + comparer string + journalNum int64 + prevJournalNum int64 + nextFileNum int64 + seqNum uint64 + compPtrs []cpRecord + addedTables []atRecord + deletedTables []dtRecord + + scratch [binary.MaxVarintLen64]byte + err error +} + +func (p *sessionRecord) has(rec int) bool { + return p.hasRec&(1<<uint(rec)) != 0 +} + +func (p *sessionRecord) setComparer(name string) { + p.hasRec |= 1 << recComparer + p.comparer = name +} + +func (p *sessionRecord) setJournalNum(num int64) { + p.hasRec |= 1 << recJournalNum + p.journalNum = num +} + +func (p *sessionRecord) setPrevJournalNum(num int64) { + p.hasRec |= 1 << recPrevJournalNum + p.prevJournalNum = num +} + +func (p *sessionRecord) setNextFileNum(num int64) { + p.hasRec |= 1 << recNextFileNum + p.nextFileNum = num +} + +func (p *sessionRecord) setSeqNum(num uint64) { + p.hasRec |= 1 << recSeqNum + p.seqNum = num +} + +func (p *sessionRecord) addCompPtr(level int, ikey internalKey) { + p.hasRec |= 1 << recCompPtr + p.compPtrs = append(p.compPtrs, cpRecord{level, ikey}) +} + +func (p *sessionRecord) resetCompPtrs() { + p.hasRec &= ^(1 << recCompPtr) + p.compPtrs = p.compPtrs[:0] +} + +func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) { + p.hasRec |= 1 << recAddTable + p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax}) +} + +func (p *sessionRecord) addTableFile(level int, t *tFile) { + p.addTable(level, t.fd.Num, t.size, t.imin, t.imax) +} + +func (p *sessionRecord) resetAddedTables() { + p.hasRec &= ^(1 << recAddTable) + p.addedTables = p.addedTables[:0] +} + +func (p *sessionRecord) delTable(level int, num int64) { + p.hasRec |= 1 << recDelTable + p.deletedTables = append(p.deletedTables, dtRecord{level, num}) +} + +func (p *sessionRecord) resetDeletedTables() { + p.hasRec &= ^(1 << recDelTable) + p.deletedTables = p.deletedTables[:0] +} + +func (p *sessionRecord) putUvarint(w io.Writer, x uint64) { + if p.err != nil { + return + } + n := binary.PutUvarint(p.scratch[:], x) + _, p.err = w.Write(p.scratch[:n]) +} + +func (p *sessionRecord) putVarint(w io.Writer, x int64) { + if x < 0 { + panic("invalid negative value") + } + p.putUvarint(w, uint64(x)) +} + +func (p *sessionRecord) putBytes(w io.Writer, x []byte) { + if p.err != nil { + return + } + p.putUvarint(w, uint64(len(x))) + if p.err != nil { + return + } + _, p.err = w.Write(x) +} + +func (p *sessionRecord) encode(w io.Writer) error { + p.err = nil + if p.has(recComparer) { + p.putUvarint(w, recComparer) + p.putBytes(w, []byte(p.comparer)) + } + if p.has(recJournalNum) { + p.putUvarint(w, recJournalNum) + p.putVarint(w, p.journalNum) + } + if p.has(recNextFileNum) { + p.putUvarint(w, recNextFileNum) + p.putVarint(w, p.nextFileNum) + } + if p.has(recSeqNum) { + p.putUvarint(w, recSeqNum) + p.putUvarint(w, p.seqNum) + } + for _, r := range p.compPtrs { + p.putUvarint(w, recCompPtr) + p.putUvarint(w, uint64(r.level)) + p.putBytes(w, r.ikey) + } + for _, r := range p.deletedTables { + p.putUvarint(w, recDelTable) + p.putUvarint(w, uint64(r.level)) + p.putVarint(w, r.num) + } + for _, r := range p.addedTables { + p.putUvarint(w, recAddTable) + p.putUvarint(w, uint64(r.level)) + p.putVarint(w, r.num) + p.putVarint(w, r.size) + p.putBytes(w, r.imin) + p.putBytes(w, r.imax) + } + return p.err +} + +func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 { + if p.err != nil { + return 0 + } + x, err := binary.ReadUvarint(r) + if err != nil { + if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) { + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"}) + } else if strings.HasPrefix(err.Error(), "binary:") { + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()}) + } else { + p.err = err + } + return 0 + } + return x +} + +func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 { + return p.readUvarintMayEOF(field, r, false) +} + +func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 { + x := int64(p.readUvarintMayEOF(field, r, false)) + if x < 0 { + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"}) + } + return x +} + +func (p *sessionRecord) readBytes(field string, r byteReader) []byte { + if p.err != nil { + return nil + } + n := p.readUvarint(field, r) + if p.err != nil { + return nil + } + x := make([]byte, n) + _, p.err = io.ReadFull(r, x) + if p.err != nil { + if p.err == io.ErrUnexpectedEOF { + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"}) + } + return nil + } + return x +} + +func (p *sessionRecord) readLevel(field string, r io.ByteReader) int { + if p.err != nil { + return 0 + } + x := p.readUvarint(field, r) + if p.err != nil { + return 0 + } + return int(x) +} + +func (p *sessionRecord) decode(r io.Reader) error { + br, ok := r.(byteReader) + if !ok { + br = bufio.NewReader(r) + } + p.err = nil + for p.err == nil { + rec := p.readUvarintMayEOF("field-header", br, true) + if p.err != nil { + if p.err == io.EOF { + return nil + } + return p.err + } + switch rec { + case recComparer: + x := p.readBytes("comparer", br) + if p.err == nil { + p.setComparer(string(x)) + } + case recJournalNum: + x := p.readVarint("journal-num", br) + if p.err == nil { + p.setJournalNum(x) + } + case recPrevJournalNum: + x := p.readVarint("prev-journal-num", br) + if p.err == nil { + p.setPrevJournalNum(x) + } + case recNextFileNum: + x := p.readVarint("next-file-num", br) + if p.err == nil { + p.setNextFileNum(x) + } + case recSeqNum: + x := p.readUvarint("seq-num", br) + if p.err == nil { + p.setSeqNum(x) + } + case recCompPtr: + level := p.readLevel("comp-ptr.level", br) + ikey := p.readBytes("comp-ptr.ikey", br) + if p.err == nil { + p.addCompPtr(level, internalKey(ikey)) + } + case recAddTable: + level := p.readLevel("add-table.level", br) + num := p.readVarint("add-table.num", br) + size := p.readVarint("add-table.size", br) + imin := p.readBytes("add-table.imin", br) + imax := p.readBytes("add-table.imax", br) + if p.err == nil { + p.addTable(level, num, size, imin, imax) + } + case recDelTable: + level := p.readLevel("del-table.level", br) + num := p.readVarint("del-table.num", br) + if p.err == nil { + p.delTable(level, num) + } + } + } + + return p.err +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go new file mode 100644 index 0000000000..40cb2cf957 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go @@ -0,0 +1,271 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +// Logging. + +type dropper struct { + s *session + fd storage.FileDesc +} + +func (d dropper) Drop(err error) { + if e, ok := err.(*journal.ErrCorrupted); ok { + d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason) + } else { + d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err) + } +} + +func (s *session) log(v ...interface{}) { s.stor.Log(fmt.Sprint(v...)) } +func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) } + +// File utils. + +func (s *session) newTemp() storage.FileDesc { + num := atomic.AddInt64(&s.stTempFileNum, 1) - 1 + return storage.FileDesc{Type: storage.TypeTemp, Num: num} +} + +func (s *session) addFileRef(fd storage.FileDesc, ref int) int { + ref += s.fileRef[fd.Num] + if ref > 0 { + s.fileRef[fd.Num] = ref + } else if ref == 0 { + delete(s.fileRef, fd.Num) + } else { + panic(fmt.Sprintf("negative ref: %v", fd)) + } + return ref +} + +// Session state. + +// Get current version. This will incr version ref, must call +// version.release (exactly once) after use. +func (s *session) version() *version { + s.vmu.Lock() + defer s.vmu.Unlock() + s.stVersion.incref() + return s.stVersion +} + +func (s *session) tLen(level int) int { + s.vmu.Lock() + defer s.vmu.Unlock() + return s.stVersion.tLen(level) +} + +// Set current version to v. +func (s *session) setVersion(v *version) { + s.vmu.Lock() + defer s.vmu.Unlock() + // Hold by session. It is important to call this first before releasing + // current version, otherwise the still used files might get released. + v.incref() + if s.stVersion != nil { + // Release current version. + s.stVersion.releaseNB() + } + s.stVersion = v +} + +// Get current unused file number. +func (s *session) nextFileNum() int64 { + return atomic.LoadInt64(&s.stNextFileNum) +} + +// Set current unused file number to num. +func (s *session) setNextFileNum(num int64) { + atomic.StoreInt64(&s.stNextFileNum, num) +} + +// Mark file number as used. +func (s *session) markFileNum(num int64) { + nextFileNum := num + 1 + for { + old, x := s.stNextFileNum, nextFileNum + if old > x { + x = old + } + if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) { + break + } + } +} + +// Allocate a file number. +func (s *session) allocFileNum() int64 { + return atomic.AddInt64(&s.stNextFileNum, 1) - 1 +} + +// Reuse given file number. +func (s *session) reuseFileNum(num int64) { + for { + old, x := s.stNextFileNum, num + if old != x+1 { + x = old + } + if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) { + break + } + } +} + +// Set compaction ptr at given level; need external synchronization. +func (s *session) setCompPtr(level int, ik internalKey) { + if level >= len(s.stCompPtrs) { + newCompPtrs := make([]internalKey, level+1) + copy(newCompPtrs, s.stCompPtrs) + s.stCompPtrs = newCompPtrs + } + s.stCompPtrs[level] = append(internalKey{}, ik...) +} + +// Get compaction ptr at given level; need external synchronization. +func (s *session) getCompPtr(level int) internalKey { + if level >= len(s.stCompPtrs) { + return nil + } + return s.stCompPtrs[level] +} + +// Manifest related utils. + +// Fill given session record obj with current states; need external +// synchronization. +func (s *session) fillRecord(r *sessionRecord, snapshot bool) { + r.setNextFileNum(s.nextFileNum()) + + if snapshot { + if !r.has(recJournalNum) { + r.setJournalNum(s.stJournalNum) + } + + if !r.has(recSeqNum) { + r.setSeqNum(s.stSeqNum) + } + + for level, ik := range s.stCompPtrs { + if ik != nil { + r.addCompPtr(level, ik) + } + } + + r.setComparer(s.icmp.uName()) + } +} + +// Mark if record has been committed, this will update session state; +// need external synchronization. +func (s *session) recordCommited(rec *sessionRecord) { + if rec.has(recJournalNum) { + s.stJournalNum = rec.journalNum + } + + if rec.has(recPrevJournalNum) { + s.stPrevJournalNum = rec.prevJournalNum + } + + if rec.has(recSeqNum) { + s.stSeqNum = rec.seqNum + } + + for _, r := range rec.compPtrs { + s.setCompPtr(r.level, internalKey(r.ikey)) + } +} + +// Create a new manifest file; need external synchronization. +func (s *session) newManifest(rec *sessionRecord, v *version) (err error) { + fd := storage.FileDesc{Type: storage.TypeManifest, Num: s.allocFileNum()} + writer, err := s.stor.Create(fd) + if err != nil { + return + } + jw := journal.NewWriter(writer) + + if v == nil { + v = s.version() + defer v.release() + } + if rec == nil { + rec = &sessionRecord{} + } + s.fillRecord(rec, true) + v.fillRecord(rec) + + defer func() { + if err == nil { + s.recordCommited(rec) + if s.manifest != nil { + s.manifest.Close() + } + if s.manifestWriter != nil { + s.manifestWriter.Close() + } + if !s.manifestFd.Zero() { + s.stor.Remove(s.manifestFd) + } + s.manifestFd = fd + s.manifestWriter = writer + s.manifest = jw + } else { + writer.Close() + s.stor.Remove(fd) + s.reuseFileNum(fd.Num) + } + }() + + w, err := jw.Next() + if err != nil { + return + } + err = rec.encode(w) + if err != nil { + return + } + err = jw.Flush() + if err != nil { + return + } + err = s.stor.SetMeta(fd) + return +} + +// Flush record to disk. +func (s *session) flushManifest(rec *sessionRecord) (err error) { + s.fillRecord(rec, false) + w, err := s.manifest.Next() + if err != nil { + return + } + err = rec.encode(w) + if err != nil { + return + } + err = s.manifest.Flush() + if err != nil { + return + } + if !s.o.GetNoSync() { + err = s.manifestWriter.Sync() + if err != nil { + return + } + } + s.recordCommited(rec) + return +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage.go new file mode 100644 index 0000000000..d45fb5dfeb --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage.go @@ -0,0 +1,63 @@ +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/storage" + "sync/atomic" +) + +type iStorage struct { + storage.Storage + read uint64 + write uint64 +} + +func (c *iStorage) Open(fd storage.FileDesc) (storage.Reader, error) { + r, err := c.Storage.Open(fd) + return &iStorageReader{r, c}, err +} + +func (c *iStorage) Create(fd storage.FileDesc) (storage.Writer, error) { + w, err := c.Storage.Create(fd) + return &iStorageWriter{w, c}, err +} + +func (c *iStorage) reads() uint64 { + return atomic.LoadUint64(&c.read) +} + +func (c *iStorage) writes() uint64 { + return atomic.LoadUint64(&c.write) +} + +// newIStorage returns the given storage wrapped by iStorage. +func newIStorage(s storage.Storage) *iStorage { + return &iStorage{s, 0, 0} +} + +type iStorageReader struct { + storage.Reader + c *iStorage +} + +func (r *iStorageReader) Read(p []byte) (n int, err error) { + n, err = r.Reader.Read(p) + atomic.AddUint64(&r.c.read, uint64(n)) + return n, err +} + +func (r *iStorageReader) ReadAt(p []byte, off int64) (n int, err error) { + n, err = r.Reader.ReadAt(p, off) + atomic.AddUint64(&r.c.read, uint64(n)) + return n, err +} + +type iStorageWriter struct { + storage.Writer + c *iStorage +} + +func (w *iStorageWriter) Write(p []byte) (n int, err error) { + n, err = w.Writer.Write(p) + atomic.AddUint64(&w.c.write, uint64(n)) + return n, err +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go new file mode 100644 index 0000000000..9ba71fd6d1 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go @@ -0,0 +1,671 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reservefs. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "time" +) + +var ( + errFileOpen = errors.New("leveldb/storage: file still open") + errReadOnly = errors.New("leveldb/storage: storage is read-only") +) + +type fileLock interface { + release() error +} + +type fileStorageLock struct { + fs *fileStorage +} + +func (lock *fileStorageLock) Unlock() { + if lock.fs != nil { + lock.fs.mu.Lock() + defer lock.fs.mu.Unlock() + if lock.fs.slock == lock { + lock.fs.slock = nil + } + } +} + +type int64Slice []int64 + +func (p int64Slice) Len() int { return len(p) } +func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] } +func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func writeFileSynced(filename string, data []byte, perm os.FileMode) error { + f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) + if err != nil { + return err + } + n, err := f.Write(data) + if err == nil && n < len(data) { + err = io.ErrShortWrite + } + if err1 := f.Sync(); err == nil { + err = err1 + } + if err1 := f.Close(); err == nil { + err = err1 + } + return err +} + +const logSizeThreshold = 1024 * 1024 // 1 MiB + +// fileStorage is a file-system backed storage. +type fileStorage struct { + path string + readOnly bool + + mu sync.Mutex + flock fileLock + slock *fileStorageLock + logw *os.File + logSize int64 + buf []byte + // Opened file counter; if open < 0 means closed. + open int + day int +} + +// OpenFile returns a new filesystem-backed storage implementation with the given +// path. This also acquire a file lock, so any subsequent attempt to open the +// same path will fail. +// +// The storage must be closed after use, by calling Close method. +func OpenFile(path string, readOnly bool) (Storage, error) { + if fi, err := os.Stat(path); err == nil { + if !fi.IsDir() { + return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path) + } + } else if os.IsNotExist(err) && !readOnly { + if err := os.MkdirAll(path, 0755); err != nil { + return nil, err + } + } else { + return nil, err + } + + flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + flock.release() + } + }() + + var ( + logw *os.File + logSize int64 + ) + if !readOnly { + logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + logSize, err = logw.Seek(0, os.SEEK_END) + if err != nil { + logw.Close() + return nil, err + } + } + + fs := &fileStorage{ + path: path, + readOnly: readOnly, + flock: flock, + logw: logw, + logSize: logSize, + } + runtime.SetFinalizer(fs, (*fileStorage).Close) + return fs, nil +} + +func (fs *fileStorage) Lock() (Locker, error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + if fs.readOnly { + return &fileStorageLock{}, nil + } + if fs.slock != nil { + return nil, ErrLocked + } + fs.slock = &fileStorageLock{fs: fs} + return fs.slock, nil +} + +func itoa(buf []byte, i int, wid int) []byte { + u := uint(i) + if u == 0 && wid <= 1 { + return append(buf, '0') + } + + // Assemble decimal in reverse order. + var b [32]byte + bp := len(b) + for ; u > 0 || wid > 0; u /= 10 { + bp-- + wid-- + b[bp] = byte(u%10) + '0' + } + return append(buf, b[bp:]...) +} + +func (fs *fileStorage) printDay(t time.Time) { + if fs.day == t.Day() { + return + } + fs.day = t.Day() + fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n")) +} + +func (fs *fileStorage) doLog(t time.Time, str string) { + if fs.logSize > logSizeThreshold { + // Rotate log file. + fs.logw.Close() + fs.logw = nil + fs.logSize = 0 + rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old")) + } + if fs.logw == nil { + var err error + fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return + } + // Force printDay on new log file. + fs.day = 0 + } + fs.printDay(t) + hour, min, sec := t.Clock() + msec := t.Nanosecond() / 1e3 + // time + fs.buf = itoa(fs.buf[:0], hour, 2) + fs.buf = append(fs.buf, ':') + fs.buf = itoa(fs.buf, min, 2) + fs.buf = append(fs.buf, ':') + fs.buf = itoa(fs.buf, sec, 2) + fs.buf = append(fs.buf, '.') + fs.buf = itoa(fs.buf, msec, 6) + fs.buf = append(fs.buf, ' ') + // write + fs.buf = append(fs.buf, []byte(str)...) + fs.buf = append(fs.buf, '\n') + n, _ := fs.logw.Write(fs.buf) + fs.logSize += int64(n) +} + +func (fs *fileStorage) Log(str string) { + if !fs.readOnly { + t := time.Now() + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return + } + fs.doLog(t, str) + } +} + +func (fs *fileStorage) log(str string) { + if !fs.readOnly { + fs.doLog(time.Now(), str) + } +} + +func (fs *fileStorage) setMeta(fd FileDesc) error { + content := fsGenName(fd) + "\n" + // Check and backup old CURRENT file. + currentPath := filepath.Join(fs.path, "CURRENT") + if _, err := os.Stat(currentPath); err == nil { + b, err := ioutil.ReadFile(currentPath) + if err != nil { + fs.log(fmt.Sprintf("backup CURRENT: %v", err)) + return err + } + if string(b) == content { + // Content not changed, do nothing. + return nil + } + if err := writeFileSynced(currentPath+".bak", b, 0644); err != nil { + fs.log(fmt.Sprintf("backup CURRENT: %v", err)) + return err + } + } else if !os.IsNotExist(err) { + return err + } + path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num) + if err := writeFileSynced(path, []byte(content), 0644); err != nil { + fs.log(fmt.Sprintf("create CURRENT.%d: %v", fd.Num, err)) + return err + } + // Replace CURRENT file. + if err := rename(path, currentPath); err != nil { + fs.log(fmt.Sprintf("rename CURRENT.%d: %v", fd.Num, err)) + return err + } + // Sync root directory. + if err := syncDir(fs.path); err != nil { + fs.log(fmt.Sprintf("syncDir: %v", err)) + return err + } + return nil +} + +func (fs *fileStorage) SetMeta(fd FileDesc) error { + if !FileDescOk(fd) { + return ErrInvalidFile + } + if fs.readOnly { + return errReadOnly + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed + } + return fs.setMeta(fd) +} + +func (fs *fileStorage) GetMeta() (FileDesc, error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return FileDesc{}, ErrClosed + } + dir, err := os.Open(fs.path) + if err != nil { + return FileDesc{}, err + } + names, err := dir.Readdirnames(0) + // Close the dir first before checking for Readdirnames error. + if ce := dir.Close(); ce != nil { + fs.log(fmt.Sprintf("close dir: %v", ce)) + } + if err != nil { + return FileDesc{}, err + } + // Try this in order: + // - CURRENT.[0-9]+ ('pending rename' file, descending order) + // - CURRENT + // - CURRENT.bak + // + // Skip corrupted file or file that point to a missing target file. + type currentFile struct { + name string + fd FileDesc + } + tryCurrent := func(name string) (*currentFile, error) { + b, err := ioutil.ReadFile(filepath.Join(fs.path, name)) + if err != nil { + if os.IsNotExist(err) { + err = os.ErrNotExist + } + return nil, err + } + var fd FileDesc + if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd) { + fs.log(fmt.Sprintf("%s: corrupted content: %q", name, b)) + err := &ErrCorrupted{ + Err: errors.New("leveldb/storage: corrupted or incomplete CURRENT file"), + } + return nil, err + } + if _, err := os.Stat(filepath.Join(fs.path, fsGenName(fd))); err != nil { + if os.IsNotExist(err) { + fs.log(fmt.Sprintf("%s: missing target file: %s", name, fd)) + err = os.ErrNotExist + } + return nil, err + } + return ¤tFile{name: name, fd: fd}, nil + } + tryCurrents := func(names []string) (*currentFile, error) { + var ( + cur *currentFile + // Last corruption error. + lastCerr error + ) + for _, name := range names { + var err error + cur, err = tryCurrent(name) + if err == nil { + break + } else if err == os.ErrNotExist { + // Fallback to the next file. + } else if isCorrupted(err) { + lastCerr = err + // Fallback to the next file. + } else { + // In case the error is due to permission, etc. + return nil, err + } + } + if cur == nil { + err := os.ErrNotExist + if lastCerr != nil { + err = lastCerr + } + return nil, err + } + return cur, nil + } + + // Try 'pending rename' files. + var nums []int64 + for _, name := range names { + if strings.HasPrefix(name, "CURRENT.") && name != "CURRENT.bak" { + i, err := strconv.ParseInt(name[8:], 10, 64) + if err == nil { + nums = append(nums, i) + } + } + } + var ( + pendCur *currentFile + pendErr = os.ErrNotExist + pendNames []string + ) + if len(nums) > 0 { + sort.Sort(sort.Reverse(int64Slice(nums))) + pendNames = make([]string, len(nums)) + for i, num := range nums { + pendNames[i] = fmt.Sprintf("CURRENT.%d", num) + } + pendCur, pendErr = tryCurrents(pendNames) + if pendErr != nil && pendErr != os.ErrNotExist && !isCorrupted(pendErr) { + return FileDesc{}, pendErr + } + } + + // Try CURRENT and CURRENT.bak. + curCur, curErr := tryCurrents([]string{"CURRENT", "CURRENT.bak"}) + if curErr != nil && curErr != os.ErrNotExist && !isCorrupted(curErr) { + return FileDesc{}, curErr + } + + // pendCur takes precedence, but guards against obsolete pendCur. + if pendCur != nil && (curCur == nil || pendCur.fd.Num > curCur.fd.Num) { + curCur = pendCur + } + + if curCur != nil { + // Restore CURRENT file to proper state. + if !fs.readOnly && (curCur.name != "CURRENT" || len(pendNames) != 0) { + // Ignore setMeta errors, however don't delete obsolete files if we + // catch error. + if err := fs.setMeta(curCur.fd); err == nil { + // Remove 'pending rename' files. + for _, name := range pendNames { + if err := os.Remove(filepath.Join(fs.path, name)); err != nil { + fs.log(fmt.Sprintf("remove %s: %v", name, err)) + } + } + } + } + return curCur.fd, nil + } + + // Nothing found. + if isCorrupted(pendErr) { + return FileDesc{}, pendErr + } + return FileDesc{}, curErr +} + +func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + dir, err := os.Open(fs.path) + if err != nil { + return + } + names, err := dir.Readdirnames(0) + // Close the dir first before checking for Readdirnames error. + if cerr := dir.Close(); cerr != nil { + fs.log(fmt.Sprintf("close dir: %v", cerr)) + } + if err == nil { + for _, name := range names { + if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 { + fds = append(fds, fd) + } + } + } + return +} + +func (fs *fileStorage) Open(fd FileDesc) (Reader, error) { + if !FileDescOk(fd) { + return nil, ErrInvalidFile + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0) + if err != nil { + if fsHasOldName(fd) && os.IsNotExist(err) { + of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0) + if err == nil { + goto ok + } + } + return nil, err + } +ok: + fs.open++ + return &fileWrap{File: of, fs: fs, fd: fd}, nil +} + +func (fs *fileStorage) Create(fd FileDesc) (Writer, error) { + if !FileDescOk(fd) { + return nil, ErrInvalidFile + } + if fs.readOnly { + return nil, errReadOnly + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return nil, err + } + fs.open++ + return &fileWrap{File: of, fs: fs, fd: fd}, nil +} + +func (fs *fileStorage) Remove(fd FileDesc) error { + if !FileDescOk(fd) { + return ErrInvalidFile + } + if fs.readOnly { + return errReadOnly + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed + } + err := os.Remove(filepath.Join(fs.path, fsGenName(fd))) + if err != nil { + if fsHasOldName(fd) && os.IsNotExist(err) { + if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) { + fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err)) + err = e1 + } + } else { + fs.log(fmt.Sprintf("remove %s: %v", fd, err)) + } + } + return err +} + +func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error { + if !FileDescOk(oldfd) || !FileDescOk(newfd) { + return ErrInvalidFile + } + if oldfd == newfd { + return nil + } + if fs.readOnly { + return errReadOnly + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed + } + return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd))) +} + +func (fs *fileStorage) Close() error { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed + } + // Clear the finalizer. + runtime.SetFinalizer(fs, nil) + + if fs.open > 0 { + fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open)) + } + fs.open = -1 + if fs.logw != nil { + fs.logw.Close() + } + return fs.flock.release() +} + +type fileWrap struct { + *os.File + fs *fileStorage + fd FileDesc + closed bool +} + +func (fw *fileWrap) Sync() error { + if err := fw.File.Sync(); err != nil { + return err + } + if fw.fd.Type == TypeManifest { + // Also sync parent directory if file type is manifest. + // See: https://code.google.com/p/leveldb/issues/detail?id=190. + if err := syncDir(fw.fs.path); err != nil { + fw.fs.log(fmt.Sprintf("syncDir: %v", err)) + return err + } + } + return nil +} + +func (fw *fileWrap) Close() error { + fw.fs.mu.Lock() + defer fw.fs.mu.Unlock() + if fw.closed { + return ErrClosed + } + fw.closed = true + fw.fs.open-- + err := fw.File.Close() + if err != nil { + fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err)) + } + return err +} + +func fsGenName(fd FileDesc) string { + switch fd.Type { + case TypeManifest: + return fmt.Sprintf("MANIFEST-%06d", fd.Num) + case TypeJournal: + return fmt.Sprintf("%06d.log", fd.Num) + case TypeTable: + return fmt.Sprintf("%06d.ldb", fd.Num) + case TypeTemp: + return fmt.Sprintf("%06d.tmp", fd.Num) + default: + panic("invalid file type") + } +} + +func fsHasOldName(fd FileDesc) bool { + return fd.Type == TypeTable +} + +func fsGenOldName(fd FileDesc) string { + switch fd.Type { + case TypeTable: + return fmt.Sprintf("%06d.sst", fd.Num) + } + return fsGenName(fd) +} + +func fsParseName(name string) (fd FileDesc, ok bool) { + var tail string + _, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail) + if err == nil { + switch tail { + case "log": + fd.Type = TypeJournal + case "ldb", "sst": + fd.Type = TypeTable + case "tmp": + fd.Type = TypeTemp + default: + return + } + return fd, true + } + n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail) + if n == 1 { + fd.Type = TypeManifest + return fd, true + } + return +} + +func fsParseNamePtr(name string, fd *FileDesc) bool { + _fd, ok := fsParseName(name) + if fd != nil { + *fd = _fd + } + return ok +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go new file mode 100644 index 0000000000..5545aeef2a --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go @@ -0,0 +1,34 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// +build nacl + +package storage + +import ( + "os" + "syscall" +) + +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + return nil, syscall.ENOTSUP +} + +func setFileLock(f *os.File, readOnly, lock bool) error { + return syscall.ENOTSUP +} + +func rename(oldpath, newpath string) error { + return syscall.ENOTSUP +} + +func isErrInvalid(err error) bool { + return false +} + +func syncDir(name string) error { + return syscall.ENOTSUP +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go new file mode 100644 index 0000000000..b829798012 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go @@ -0,0 +1,63 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "os" +) + +type plan9FileLock struct { + f *os.File +} + +func (fl *plan9FileLock) release() error { + return fl.f.Close() +} + +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + var ( + flag int + perm os.FileMode + ) + if readOnly { + flag = os.O_RDONLY + } else { + flag = os.O_RDWR + perm = os.ModeExclusive + } + f, err := os.OpenFile(path, flag, perm) + if os.IsNotExist(err) { + f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644) + } + if err != nil { + return + } + fl = &plan9FileLock{f: f} + return +} + +func rename(oldpath, newpath string) error { + if _, err := os.Stat(newpath); err == nil { + if err := os.Remove(newpath); err != nil { + return err + } + } + + return os.Rename(oldpath, newpath) +} + +func syncDir(name string) error { + f, err := os.Open(name) + if err != nil { + return err + } + defer f.Close() + if err := f.Sync(); err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go new file mode 100644 index 0000000000..79901ee4a7 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go @@ -0,0 +1,81 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// +build solaris + +package storage + +import ( + "os" + "syscall" +) + +type unixFileLock struct { + f *os.File +} + +func (fl *unixFileLock) release() error { + if err := setFileLock(fl.f, false, false); err != nil { + return err + } + return fl.f.Close() +} + +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + var flag int + if readOnly { + flag = os.O_RDONLY + } else { + flag = os.O_RDWR + } + f, err := os.OpenFile(path, flag, 0) + if os.IsNotExist(err) { + f, err = os.OpenFile(path, flag|os.O_CREATE, 0644) + } + if err != nil { + return + } + err = setFileLock(f, readOnly, true) + if err != nil { + f.Close() + return + } + fl = &unixFileLock{f: f} + return +} + +func setFileLock(f *os.File, readOnly, lock bool) error { + flock := syscall.Flock_t{ + Type: syscall.F_UNLCK, + Start: 0, + Len: 0, + Whence: 1, + } + if lock { + if readOnly { + flock.Type = syscall.F_RDLCK + } else { + flock.Type = syscall.F_WRLCK + } + } + return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock) +} + +func rename(oldpath, newpath string) error { + return os.Rename(oldpath, newpath) +} + +func syncDir(name string) error { + f, err := os.Open(name) + if err != nil { + return err + } + defer f.Close() + if err := f.Sync(); err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go new file mode 100644 index 0000000000..d75f66a9ef --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go @@ -0,0 +1,98 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// +build darwin dragonfly freebsd linux netbsd openbsd + +package storage + +import ( + "os" + "syscall" +) + +type unixFileLock struct { + f *os.File +} + +func (fl *unixFileLock) release() error { + if err := setFileLock(fl.f, false, false); err != nil { + return err + } + return fl.f.Close() +} + +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + var flag int + if readOnly { + flag = os.O_RDONLY + } else { + flag = os.O_RDWR + } + f, err := os.OpenFile(path, flag, 0) + if os.IsNotExist(err) { + f, err = os.OpenFile(path, flag|os.O_CREATE, 0644) + } + if err != nil { + return + } + err = setFileLock(f, readOnly, true) + if err != nil { + f.Close() + return + } + fl = &unixFileLock{f: f} + return +} + +func setFileLock(f *os.File, readOnly, lock bool) error { + how := syscall.LOCK_UN + if lock { + if readOnly { + how = syscall.LOCK_SH + } else { + how = syscall.LOCK_EX + } + } + return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB) +} + +func rename(oldpath, newpath string) error { + return os.Rename(oldpath, newpath) +} + +func isErrInvalid(err error) bool { + if err == os.ErrInvalid { + return true + } + // Go < 1.8 + if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL { + return true + } + // Go >= 1.8 returns *os.PathError instead + if patherr, ok := err.(*os.PathError); ok && patherr.Err == syscall.EINVAL { + return true + } + return false +} + +func syncDir(name string) error { + // As per fsync manpage, Linux seems to expect fsync on directory, however + // some system don't support this, so we will ignore syscall.EINVAL. + // + // From fsync(2): + // Calling fsync() does not necessarily ensure that the entry in the + // directory containing the file has also reached disk. For that an + // explicit fsync() on a file descriptor for the directory is also needed. + f, err := os.Open(name) + if err != nil { + return err + } + defer f.Close() + if err := f.Sync(); err != nil && !isErrInvalid(err) { + return err + } + return nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go new file mode 100644 index 0000000000..899335fd7e --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go @@ -0,0 +1,78 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "syscall" + "unsafe" +) + +var ( + modkernel32 = syscall.NewLazyDLL("kernel32.dll") + + procMoveFileExW = modkernel32.NewProc("MoveFileExW") +) + +const ( + _MOVEFILE_REPLACE_EXISTING = 1 +) + +type windowsFileLock struct { + fd syscall.Handle +} + +func (fl *windowsFileLock) release() error { + return syscall.Close(fl.fd) +} + +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return + } + var access, shareMode uint32 + if readOnly { + access = syscall.GENERIC_READ + shareMode = syscall.FILE_SHARE_READ + } else { + access = syscall.GENERIC_READ | syscall.GENERIC_WRITE + } + fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0) + if err == syscall.ERROR_FILE_NOT_FOUND { + fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0) + } + if err != nil { + return + } + fl = &windowsFileLock{fd: fd} + return +} + +func moveFileEx(from *uint16, to *uint16, flags uint32) error { + r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags)) + if r1 == 0 { + if e1 != 0 { + return error(e1) + } + return syscall.EINVAL + } + return nil +} + +func rename(oldpath, newpath string) error { + from, err := syscall.UTF16PtrFromString(oldpath) + if err != nil { + return err + } + to, err := syscall.UTF16PtrFromString(newpath) + if err != nil { + return err + } + return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING) +} + +func syncDir(name string) error { return nil } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go new file mode 100644 index 0000000000..838f1bee1b --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go @@ -0,0 +1,222 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "bytes" + "os" + "sync" +) + +const typeShift = 4 + +// Verify at compile-time that typeShift is large enough to cover all FileType +// values by confirming that 0 == 0. +var _ [0]struct{} = [TypeAll >> typeShift]struct{}{} + +type memStorageLock struct { + ms *memStorage +} + +func (lock *memStorageLock) Unlock() { + ms := lock.ms + ms.mu.Lock() + defer ms.mu.Unlock() + if ms.slock == lock { + ms.slock = nil + } + return +} + +// memStorage is a memory-backed storage. +type memStorage struct { + mu sync.Mutex + slock *memStorageLock + files map[uint64]*memFile + meta FileDesc +} + +// NewMemStorage returns a new memory-backed storage implementation. +func NewMemStorage() Storage { + return &memStorage{ + files: make(map[uint64]*memFile), + } +} + +func (ms *memStorage) Lock() (Locker, error) { + ms.mu.Lock() + defer ms.mu.Unlock() + if ms.slock != nil { + return nil, ErrLocked + } + ms.slock = &memStorageLock{ms: ms} + return ms.slock, nil +} + +func (*memStorage) Log(str string) {} + +func (ms *memStorage) SetMeta(fd FileDesc) error { + if !FileDescOk(fd) { + return ErrInvalidFile + } + + ms.mu.Lock() + ms.meta = fd + ms.mu.Unlock() + return nil +} + +func (ms *memStorage) GetMeta() (FileDesc, error) { + ms.mu.Lock() + defer ms.mu.Unlock() + if ms.meta.Zero() { + return FileDesc{}, os.ErrNotExist + } + return ms.meta, nil +} + +func (ms *memStorage) List(ft FileType) ([]FileDesc, error) { + ms.mu.Lock() + var fds []FileDesc + for x := range ms.files { + fd := unpackFile(x) + if fd.Type&ft != 0 { + fds = append(fds, fd) + } + } + ms.mu.Unlock() + return fds, nil +} + +func (ms *memStorage) Open(fd FileDesc) (Reader, error) { + if !FileDescOk(fd) { + return nil, ErrInvalidFile + } + + ms.mu.Lock() + defer ms.mu.Unlock() + if m, exist := ms.files[packFile(fd)]; exist { + if m.open { + return nil, errFileOpen + } + m.open = true + return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil + } + return nil, os.ErrNotExist +} + +func (ms *memStorage) Create(fd FileDesc) (Writer, error) { + if !FileDescOk(fd) { + return nil, ErrInvalidFile + } + + x := packFile(fd) + ms.mu.Lock() + defer ms.mu.Unlock() + m, exist := ms.files[x] + if exist { + if m.open { + return nil, errFileOpen + } + m.Reset() + } else { + m = &memFile{} + ms.files[x] = m + } + m.open = true + return &memWriter{memFile: m, ms: ms}, nil +} + +func (ms *memStorage) Remove(fd FileDesc) error { + if !FileDescOk(fd) { + return ErrInvalidFile + } + + x := packFile(fd) + ms.mu.Lock() + defer ms.mu.Unlock() + if _, exist := ms.files[x]; exist { + delete(ms.files, x) + return nil + } + return os.ErrNotExist +} + +func (ms *memStorage) Rename(oldfd, newfd FileDesc) error { + if !FileDescOk(oldfd) || !FileDescOk(newfd) { + return ErrInvalidFile + } + if oldfd == newfd { + return nil + } + + oldx := packFile(oldfd) + newx := packFile(newfd) + ms.mu.Lock() + defer ms.mu.Unlock() + oldm, exist := ms.files[oldx] + if !exist { + return os.ErrNotExist + } + newm, exist := ms.files[newx] + if (exist && newm.open) || oldm.open { + return errFileOpen + } + delete(ms.files, oldx) + ms.files[newx] = oldm + return nil +} + +func (*memStorage) Close() error { return nil } + +type memFile struct { + bytes.Buffer + open bool +} + +type memReader struct { + *bytes.Reader + ms *memStorage + m *memFile + closed bool +} + +func (mr *memReader) Close() error { + mr.ms.mu.Lock() + defer mr.ms.mu.Unlock() + if mr.closed { + return ErrClosed + } + mr.m.open = false + return nil +} + +type memWriter struct { + *memFile + ms *memStorage + closed bool +} + +func (*memWriter) Sync() error { return nil } + +func (mw *memWriter) Close() error { + mw.ms.mu.Lock() + defer mw.ms.mu.Unlock() + if mw.closed { + return ErrClosed + } + mw.memFile.open = false + return nil +} + +func packFile(fd FileDesc) uint64 { + return uint64(fd.Num)<<typeShift | uint64(fd.Type) +} + +func unpackFile(x uint64) FileDesc { + return FileDesc{FileType(x) & TypeAll, int64(x >> typeShift)} +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go new file mode 100644 index 0000000000..4e4a724258 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go @@ -0,0 +1,187 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package storage provides storage abstraction for LevelDB. +package storage + +import ( + "errors" + "fmt" + "io" +) + +// FileType represent a file type. +type FileType int + +// File types. +const ( + TypeManifest FileType = 1 << iota + TypeJournal + TypeTable + TypeTemp + + TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp +) + +func (t FileType) String() string { + switch t { + case TypeManifest: + return "manifest" + case TypeJournal: + return "journal" + case TypeTable: + return "table" + case TypeTemp: + return "temp" + } + return fmt.Sprintf("<unknown:%d>", t) +} + +// Common error. +var ( + ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument") + ErrLocked = errors.New("leveldb/storage: already locked") + ErrClosed = errors.New("leveldb/storage: closed") +) + +// ErrCorrupted is the type that wraps errors that indicate corruption of +// a file. Package storage has its own type instead of using +// errors.ErrCorrupted to prevent circular import. +type ErrCorrupted struct { + Fd FileDesc + Err error +} + +func isCorrupted(err error) bool { + switch err.(type) { + case *ErrCorrupted: + return true + } + return false +} + +func (e *ErrCorrupted) Error() string { + if !e.Fd.Zero() { + return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd) + } + return e.Err.Error() +} + +// Syncer is the interface that wraps basic Sync method. +type Syncer interface { + // Sync commits the current contents of the file to stable storage. + Sync() error +} + +// Reader is the interface that groups the basic Read, Seek, ReadAt and Close +// methods. +type Reader interface { + io.ReadSeeker + io.ReaderAt + io.Closer +} + +// Writer is the interface that groups the basic Write, Sync and Close +// methods. +type Writer interface { + io.WriteCloser + Syncer +} + +// Locker is the interface that wraps Unlock method. +type Locker interface { + Unlock() +} + +// FileDesc is a 'file descriptor'. +type FileDesc struct { + Type FileType + Num int64 +} + +func (fd FileDesc) String() string { + switch fd.Type { + case TypeManifest: + return fmt.Sprintf("MANIFEST-%06d", fd.Num) + case TypeJournal: + return fmt.Sprintf("%06d.log", fd.Num) + case TypeTable: + return fmt.Sprintf("%06d.ldb", fd.Num) + case TypeTemp: + return fmt.Sprintf("%06d.tmp", fd.Num) + default: + return fmt.Sprintf("%#x-%d", fd.Type, fd.Num) + } +} + +// Zero returns true if fd == (FileDesc{}). +func (fd FileDesc) Zero() bool { + return fd == (FileDesc{}) +} + +// FileDescOk returns true if fd is a valid 'file descriptor'. +func FileDescOk(fd FileDesc) bool { + switch fd.Type { + case TypeManifest: + case TypeJournal: + case TypeTable: + case TypeTemp: + default: + return false + } + return fd.Num >= 0 +} + +// Storage is the storage. A storage instance must be safe for concurrent use. +type Storage interface { + // Lock locks the storage. Any subsequent attempt to call Lock will fail + // until the last lock released. + // Caller should call Unlock method after use. + Lock() (Locker, error) + + // Log logs a string. This is used for logging. + // An implementation may write to a file, stdout or simply do nothing. + Log(str string) + + // SetMeta store 'file descriptor' that can later be acquired using GetMeta + // method. The 'file descriptor' should point to a valid file. + // SetMeta should be implemented in such way that changes should happen + // atomically. + SetMeta(fd FileDesc) error + + // GetMeta returns 'file descriptor' stored in meta. The 'file descriptor' + // can be updated using SetMeta method. + // Returns os.ErrNotExist if meta doesn't store any 'file descriptor', or + // 'file descriptor' point to nonexistent file. + GetMeta() (FileDesc, error) + + // List returns file descriptors that match the given file types. + // The file types may be OR'ed together. + List(ft FileType) ([]FileDesc, error) + + // Open opens file with the given 'file descriptor' read-only. + // Returns os.ErrNotExist error if the file does not exist. + // Returns ErrClosed if the underlying storage is closed. + Open(fd FileDesc) (Reader, error) + + // Create creates file with the given 'file descriptor', truncate if already + // exist and opens write-only. + // Returns ErrClosed if the underlying storage is closed. + Create(fd FileDesc) (Writer, error) + + // Remove removes file with the given 'file descriptor'. + // Returns ErrClosed if the underlying storage is closed. + Remove(fd FileDesc) error + + // Rename renames file from oldfd to newfd. + // Returns ErrClosed if the underlying storage is closed. + Rename(oldfd, newfd FileDesc) error + + // Close closes the storage. + // It is valid to call Close multiple times. Other methods should not be + // called after the storage has been closed. + Close() error +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go new file mode 100644 index 0000000000..1fac60d050 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go @@ -0,0 +1,531 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "sort" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/table" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// tFile holds basic information about a table. +type tFile struct { + fd storage.FileDesc + seekLeft int32 + size int64 + imin, imax internalKey +} + +// Returns true if given key is after largest key of this table. +func (t *tFile) after(icmp *iComparer, ukey []byte) bool { + return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0 +} + +// Returns true if given key is before smallest key of this table. +func (t *tFile) before(icmp *iComparer, ukey []byte) bool { + return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0 +} + +// Returns true if given key range overlaps with this table key range. +func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool { + return !t.after(icmp, umin) && !t.before(icmp, umax) +} + +// Cosumes one seek and return current seeks left. +func (t *tFile) consumeSeek() int32 { + return atomic.AddInt32(&t.seekLeft, -1) +} + +// Creates new tFile. +func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile { + f := &tFile{ + fd: fd, + size: size, + imin: imin, + imax: imax, + } + + // We arrange to automatically compact this file after + // a certain number of seeks. Let's assume: + // (1) One seek costs 10ms + // (2) Writing or reading 1MB costs 10ms (100MB/s) + // (3) A compaction of 1MB does 25MB of IO: + // 1MB read from this level + // 10-12MB read from next level (boundaries may be misaligned) + // 10-12MB written to next level + // This implies that 25 seeks cost the same as the compaction + // of 1MB of data. I.e., one seek costs approximately the + // same as the compaction of 40KB of data. We are a little + // conservative and allow approximately one seek for every 16KB + // of data before triggering a compaction. + f.seekLeft = int32(size / 16384) + if f.seekLeft < 100 { + f.seekLeft = 100 + } + + return f +} + +func tableFileFromRecord(r atRecord) *tFile { + return newTableFile(storage.FileDesc{Type: storage.TypeTable, Num: r.num}, r.size, r.imin, r.imax) +} + +// tFiles hold multiple tFile. +type tFiles []*tFile + +func (tf tFiles) Len() int { return len(tf) } +func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] } + +func (tf tFiles) nums() string { + x := "[ " + for i, f := range tf { + if i != 0 { + x += ", " + } + x += fmt.Sprint(f.fd.Num) + } + x += " ]" + return x +} + +// Returns true if i smallest key is less than j. +// This used for sort by key in ascending order. +func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool { + a, b := tf[i], tf[j] + n := icmp.Compare(a.imin, b.imin) + if n == 0 { + return a.fd.Num < b.fd.Num + } + return n < 0 +} + +// Returns true if i file number is greater than j. +// This used for sort by file number in descending order. +func (tf tFiles) lessByNum(i, j int) bool { + return tf[i].fd.Num > tf[j].fd.Num +} + +// Sorts tables by key in ascending order. +func (tf tFiles) sortByKey(icmp *iComparer) { + sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp}) +} + +// Sorts tables by file number in descending order. +func (tf tFiles) sortByNum() { + sort.Sort(&tFilesSortByNum{tFiles: tf}) +} + +// Returns sum of all tables size. +func (tf tFiles) size() (sum int64) { + for _, t := range tf { + sum += t.size + } + return sum +} + +// Searches smallest index of tables whose its smallest +// key is after or equal with given key. +func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int { + return sort.Search(len(tf), func(i int) bool { + return icmp.Compare(tf[i].imin, ikey) >= 0 + }) +} + +// Searches smallest index of tables whose its largest +// key is after or equal with given key. +func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int { + return sort.Search(len(tf), func(i int) bool { + return icmp.Compare(tf[i].imax, ikey) >= 0 + }) +} + +// Returns true if given key range overlaps with one or more +// tables key range. If unsorted is true then binary search will not be used. +func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool { + if unsorted { + // Check against all files. + for _, t := range tf { + if t.overlaps(icmp, umin, umax) { + return true + } + } + return false + } + + i := 0 + if len(umin) > 0 { + // Find the earliest possible internal key for min. + i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek)) + } + if i >= len(tf) { + // Beginning of range is after all files, so no overlap. + return false + } + return !tf[i].before(icmp, umax) +} + +// Returns tables whose its key range overlaps with given key range. +// Range will be expanded if ukey found hop across tables. +// If overlapped is true then the search will be restarted if umax +// expanded. +// The dst content will be overwritten. +func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles { + dst = dst[:0] + for i := 0; i < len(tf); { + t := tf[i] + if t.overlaps(icmp, umin, umax) { + if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 { + umin = t.imin.ukey() + dst = dst[:0] + i = 0 + continue + } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 { + umax = t.imax.ukey() + // Restart search if it is overlapped. + if overlapped { + dst = dst[:0] + i = 0 + continue + } + } + + dst = append(dst, t) + } + i++ + } + + return dst +} + +// Returns tables key range. +func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) { + for i, t := range tf { + if i == 0 { + imin, imax = t.imin, t.imax + continue + } + if icmp.Compare(t.imin, imin) < 0 { + imin = t.imin + } + if icmp.Compare(t.imax, imax) > 0 { + imax = t.imax + } + } + + return +} + +// Creates iterator index from tables. +func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer { + if slice != nil { + var start, limit int + if slice.Start != nil { + start = tf.searchMax(icmp, internalKey(slice.Start)) + } + if slice.Limit != nil { + limit = tf.searchMin(icmp, internalKey(slice.Limit)) + } else { + limit = tf.Len() + } + tf = tf[start:limit] + } + return iterator.NewArrayIndexer(&tFilesArrayIndexer{ + tFiles: tf, + tops: tops, + icmp: icmp, + slice: slice, + ro: ro, + }) +} + +// Tables iterator index. +type tFilesArrayIndexer struct { + tFiles + tops *tOps + icmp *iComparer + slice *util.Range + ro *opt.ReadOptions +} + +func (a *tFilesArrayIndexer) Search(key []byte) int { + return a.searchMax(a.icmp, internalKey(key)) +} + +func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator { + if i == 0 || i == a.Len()-1 { + return a.tops.newIterator(a.tFiles[i], a.slice, a.ro) + } + return a.tops.newIterator(a.tFiles[i], nil, a.ro) +} + +// Helper type for sortByKey. +type tFilesSortByKey struct { + tFiles + icmp *iComparer +} + +func (x *tFilesSortByKey) Less(i, j int) bool { + return x.lessByKey(x.icmp, i, j) +} + +// Helper type for sortByNum. +type tFilesSortByNum struct { + tFiles +} + +func (x *tFilesSortByNum) Less(i, j int) bool { + return x.lessByNum(i, j) +} + +// Table operations. +type tOps struct { + s *session + noSync bool + evictRemoved bool + cache *cache.Cache + bcache *cache.Cache + bpool *util.BufferPool +} + +// Creates an empty table and returns table writer. +func (t *tOps) create() (*tWriter, error) { + fd := storage.FileDesc{Type: storage.TypeTable, Num: t.s.allocFileNum()} + fw, err := t.s.stor.Create(fd) + if err != nil { + return nil, err + } + return &tWriter{ + t: t, + fd: fd, + w: fw, + tw: table.NewWriter(fw, t.s.o.Options), + }, nil +} + +// Builds table from src iterator. +func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) { + w, err := t.create() + if err != nil { + return + } + + defer func() { + if err != nil { + w.drop() + } + }() + + for src.Next() { + err = w.append(src.Key(), src.Value()) + if err != nil { + return + } + } + err = src.Error() + if err != nil { + return + } + + n = w.tw.EntriesLen() + f, err = w.finish() + return +} + +// Opens table. It returns a cache handle, which should +// be released after use. +func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) { + ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) { + var r storage.Reader + r, err = t.s.stor.Open(f.fd) + if err != nil { + return 0, nil + } + + var bcache *cache.NamespaceGetter + if t.bcache != nil { + bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)} + } + + var tr *table.Reader + tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options) + if err != nil { + r.Close() + return 0, nil + } + return 1, tr + + }) + if ch == nil && err == nil { + err = ErrClosed + } + return +} + +// Finds key/value pair whose key is greater than or equal to the +// given key. +func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) { + ch, err := t.open(f) + if err != nil { + return nil, nil, err + } + defer ch.Release() + return ch.Value().(*table.Reader).Find(key, true, ro) +} + +// Finds key that is greater than or equal to the given key. +func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) { + ch, err := t.open(f) + if err != nil { + return nil, err + } + defer ch.Release() + return ch.Value().(*table.Reader).FindKey(key, true, ro) +} + +// Returns approximate offset of the given key. +func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) { + ch, err := t.open(f) + if err != nil { + return + } + defer ch.Release() + return ch.Value().(*table.Reader).OffsetOf(key) +} + +// Creates an iterator from the given table. +func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + ch, err := t.open(f) + if err != nil { + return iterator.NewEmptyIterator(err) + } + iter := ch.Value().(*table.Reader).NewIterator(slice, ro) + iter.SetReleaser(ch) + return iter +} + +// Removes table from persistent storage. It waits until +// no one use the the table. +func (t *tOps) remove(f *tFile) { + t.cache.Delete(0, uint64(f.fd.Num), func() { + if err := t.s.stor.Remove(f.fd); err != nil { + t.s.logf("table@remove removing @%d %q", f.fd.Num, err) + } else { + t.s.logf("table@remove removed @%d", f.fd.Num) + } + if t.evictRemoved && t.bcache != nil { + t.bcache.EvictNS(uint64(f.fd.Num)) + } + }) +} + +// Closes the table ops instance. It will close all tables, +// regadless still used or not. +func (t *tOps) close() { + t.bpool.Close() + t.cache.Close() + if t.bcache != nil { + t.bcache.CloseWeak() + } +} + +// Creates new initialized table ops instance. +func newTableOps(s *session) *tOps { + var ( + cacher cache.Cacher + bcache *cache.Cache + bpool *util.BufferPool + ) + if s.o.GetOpenFilesCacheCapacity() > 0 { + cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity()) + } + if !s.o.GetDisableBlockCache() { + var bcacher cache.Cacher + if s.o.GetBlockCacheCapacity() > 0 { + bcacher = s.o.GetBlockCacher().New(s.o.GetBlockCacheCapacity()) + } + bcache = cache.NewCache(bcacher) + } + if !s.o.GetDisableBufferPool() { + bpool = util.NewBufferPool(s.o.GetBlockSize() + 5) + } + return &tOps{ + s: s, + noSync: s.o.GetNoSync(), + evictRemoved: s.o.GetBlockCacheEvictRemoved(), + cache: cache.NewCache(cacher), + bcache: bcache, + bpool: bpool, + } +} + +// tWriter wraps the table writer. It keep track of file descriptor +// and added key range. +type tWriter struct { + t *tOps + + fd storage.FileDesc + w storage.Writer + tw *table.Writer + + first, last []byte +} + +// Append key/value pair to the table. +func (w *tWriter) append(key, value []byte) error { + if w.first == nil { + w.first = append([]byte{}, key...) + } + w.last = append(w.last[:0], key...) + return w.tw.Append(key, value) +} + +// Returns true if the table is empty. +func (w *tWriter) empty() bool { + return w.first == nil +} + +// Closes the storage.Writer. +func (w *tWriter) close() { + if w.w != nil { + w.w.Close() + w.w = nil + } +} + +// Finalizes the table and returns table file. +func (w *tWriter) finish() (f *tFile, err error) { + defer w.close() + err = w.tw.Close() + if err != nil { + return + } + if !w.t.noSync { + err = w.w.Sync() + if err != nil { + return + } + } + f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last)) + return +} + +// Drops the table. +func (w *tWriter) drop() { + w.close() + w.t.s.stor.Remove(w.fd) + w.t.s.reuseFileNum(w.fd.Num) + w.tw = nil + w.first = nil + w.last = nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go new file mode 100644 index 0000000000..16cfbaa006 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go @@ -0,0 +1,1135 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package table + +import ( + "encoding/binary" + "fmt" + "io" + "sort" + "strings" + "sync" + + "github.com/golang/snappy" + + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// Reader errors. +var ( + ErrNotFound = errors.ErrNotFound + ErrReaderReleased = errors.New("leveldb/table: reader released") + ErrIterReleased = errors.New("leveldb/table: iterator released") +) + +// ErrCorrupted describes error due to corruption. This error will be wrapped +// with errors.ErrCorrupted. +type ErrCorrupted struct { + Pos int64 + Size int64 + Kind string + Reason string +} + +func (e *ErrCorrupted) Error() string { + return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason) +} + +func max(x, y int) int { + if x > y { + return x + } + return y +} + +type block struct { + bpool *util.BufferPool + bh blockHandle + data []byte + restartsLen int + restartsOffset int +} + +func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) { + index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { + offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) + offset++ // shared always zero, since this is a restart point + v1, n1 := binary.Uvarint(b.data[offset:]) // key length + _, n2 := binary.Uvarint(b.data[offset+n1:]) // value length + m := offset + n1 + n2 + return cmp.Compare(b.data[m:m+int(v1)], key) > 0 + }) + rstart - 1 + if index < rstart { + // The smallest key is greater-than key sought. + index = rstart + } + offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:])) + return +} + +func (b *block) restartIndex(rstart, rlimit, offset int) int { + return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { + return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset + }) + rstart - 1 +} + +func (b *block) restartOffset(index int) int { + return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:])) +} + +func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) { + if offset >= b.restartsOffset { + if offset != b.restartsOffset { + err = &ErrCorrupted{Reason: "entries offset not aligned"} + } + return + } + v0, n0 := binary.Uvarint(b.data[offset:]) // Shared prefix length + v1, n1 := binary.Uvarint(b.data[offset+n0:]) // Key length + v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length + m := n0 + n1 + n2 + n = m + int(v1) + int(v2) + if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset { + err = &ErrCorrupted{Reason: "entries corrupted"} + return + } + key = b.data[offset+m : offset+m+int(v1)] + value = b.data[offset+m+int(v1) : offset+n] + nShared = int(v0) + return +} + +func (b *block) Release() { + b.bpool.Put(b.data) + b.bpool = nil + b.data = nil +} + +type dir int + +const ( + dirReleased dir = iota - 1 + dirSOI + dirEOI + dirBackward + dirForward +) + +type blockIter struct { + tr *Reader + block *block + blockReleaser util.Releaser + releaser util.Releaser + key, value []byte + offset int + // Previous offset, only filled by Next. + prevOffset int + prevNode []int + prevKeys []byte + restartIndex int + // Iterator direction. + dir dir + // Restart index slice range. + riStart int + riLimit int + // Offset slice range. + offsetStart int + offsetRealStart int + offsetLimit int + // Error. + err error +} + +func (i *blockIter) sErr(err error) { + i.err = err + i.key = nil + i.value = nil + i.prevNode = nil + i.prevKeys = nil +} + +func (i *blockIter) reset() { + if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + i.restartIndex = i.riStart + i.offset = i.offsetStart + i.dir = dirSOI + i.key = i.key[:0] + i.value = nil +} + +func (i *blockIter) isFirst() bool { + switch i.dir { + case dirForward: + return i.prevOffset == i.offsetRealStart + case dirBackward: + return len(i.prevNode) == 1 && i.restartIndex == i.riStart + } + return false +} + +func (i *blockIter) isLast() bool { + switch i.dir { + case dirForward, dirBackward: + return i.offset == i.offsetLimit + } + return false +} + +func (i *blockIter) First() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + i.dir = dirSOI + return i.Next() +} + +func (i *blockIter) Last() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + i.dir = dirEOI + return i.Prev() +} + +func (i *blockIter) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key) + if err != nil { + i.sErr(err) + return false + } + i.restartIndex = ri + i.offset = max(i.offsetStart, offset) + if i.dir == dirSOI || i.dir == dirEOI { + i.dir = dirForward + } + for i.Next() { + if i.tr.cmp.Compare(i.key, key) >= 0 { + return true + } + } + return false +} + +func (i *blockIter) Next() bool { + if i.dir == dirEOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.dir == dirSOI { + i.restartIndex = i.riStart + i.offset = i.offsetStart + } else if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + for i.offset < i.offsetRealStart { + key, value, nShared, n, err := i.block.entry(i.offset) + if err != nil { + i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) + return false + } + if n == 0 { + i.dir = dirEOI + return false + } + i.key = append(i.key[:nShared], key...) + i.value = value + i.offset += n + } + if i.offset >= i.offsetLimit { + i.dir = dirEOI + if i.offset != i.offsetLimit { + i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned")) + } + return false + } + key, value, nShared, n, err := i.block.entry(i.offset) + if err != nil { + i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) + return false + } + if n == 0 { + i.dir = dirEOI + return false + } + i.key = append(i.key[:nShared], key...) + i.value = value + i.prevOffset = i.offset + i.offset += n + i.dir = dirForward + return true +} + +func (i *blockIter) Prev() bool { + if i.dir == dirSOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + var ri int + if i.dir == dirForward { + // Change direction. + i.offset = i.prevOffset + if i.offset == i.offsetRealStart { + i.dir = dirSOI + return false + } + ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset) + i.dir = dirBackward + } else if i.dir == dirEOI { + // At the end of iterator. + i.restartIndex = i.riLimit + i.offset = i.offsetLimit + if i.offset == i.offsetRealStart { + i.dir = dirSOI + return false + } + ri = i.riLimit - 1 + i.dir = dirBackward + } else if len(i.prevNode) == 1 { + // This is the end of a restart range. + i.offset = i.prevNode[0] + i.prevNode = i.prevNode[:0] + if i.restartIndex == i.riStart { + i.dir = dirSOI + return false + } + i.restartIndex-- + ri = i.restartIndex + } else { + // In the middle of restart range, get from cache. + n := len(i.prevNode) - 3 + node := i.prevNode[n:] + i.prevNode = i.prevNode[:n] + // Get the key. + ko := node[0] + i.key = append(i.key[:0], i.prevKeys[ko:]...) + i.prevKeys = i.prevKeys[:ko] + // Get the value. + vo := node[1] + vl := vo + node[2] + i.value = i.block.data[vo:vl] + i.offset = vl + return true + } + // Build entries cache. + i.key = i.key[:0] + i.value = nil + offset := i.block.restartOffset(ri) + if offset == i.offset { + ri-- + if ri < 0 { + i.dir = dirSOI + return false + } + offset = i.block.restartOffset(ri) + } + i.prevNode = append(i.prevNode, offset) + for { + key, value, nShared, n, err := i.block.entry(offset) + if err != nil { + i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err)) + return false + } + if offset >= i.offsetRealStart { + if i.value != nil { + // Appends 3 variables: + // 1. Previous keys offset + // 2. Value offset in the data block + // 3. Value length + i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value)) + i.prevKeys = append(i.prevKeys, i.key...) + } + i.value = value + } + i.key = append(i.key[:nShared], key...) + offset += n + // Stop if target offset reached. + if offset >= i.offset { + if offset != i.offset { + i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned")) + return false + } + + break + } + } + i.restartIndex = ri + i.offset = offset + return true +} + +func (i *blockIter) Key() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.key +} + +func (i *blockIter) Value() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.value +} + +func (i *blockIter) Release() { + if i.dir != dirReleased { + i.tr = nil + i.block = nil + i.prevNode = nil + i.prevKeys = nil + i.key = nil + i.value = nil + i.dir = dirReleased + if i.blockReleaser != nil { + i.blockReleaser.Release() + i.blockReleaser = nil + } + if i.releaser != nil { + i.releaser.Release() + i.releaser = nil + } + } +} + +func (i *blockIter) SetReleaser(releaser util.Releaser) { + if i.dir == dirReleased { + panic(util.ErrReleased) + } + if i.releaser != nil && releaser != nil { + panic(util.ErrHasReleaser) + } + i.releaser = releaser +} + +func (i *blockIter) Valid() bool { + return i.err == nil && (i.dir == dirBackward || i.dir == dirForward) +} + +func (i *blockIter) Error() error { + return i.err +} + +type filterBlock struct { + bpool *util.BufferPool + data []byte + oOffset int + baseLg uint + filtersNum int +} + +func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool { + i := int(offset >> b.baseLg) + if i < b.filtersNum { + o := b.data[b.oOffset+i*4:] + n := int(binary.LittleEndian.Uint32(o)) + m := int(binary.LittleEndian.Uint32(o[4:])) + if n < m && m <= b.oOffset { + return filter.Contains(b.data[n:m], key) + } else if n == m { + return false + } + } + return true +} + +func (b *filterBlock) Release() { + b.bpool.Put(b.data) + b.bpool = nil + b.data = nil +} + +type indexIter struct { + *blockIter + tr *Reader + slice *util.Range + // Options + fillCache bool +} + +func (i *indexIter) Get() iterator.Iterator { + value := i.Value() + if value == nil { + return nil + } + dataBH, n := decodeBlockHandle(value) + if n == 0 { + return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle")) + } + + var slice *util.Range + if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) { + slice = i.slice + } + return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache) +} + +// Reader is a table reader. +type Reader struct { + mu sync.RWMutex + fd storage.FileDesc + reader io.ReaderAt + cache *cache.NamespaceGetter + err error + bpool *util.BufferPool + // Options + o *opt.Options + cmp comparer.Comparer + filter filter.Filter + verifyChecksum bool + + dataEnd int64 + metaBH, indexBH, filterBH blockHandle + indexBlock *block + filterBlock *filterBlock +} + +func (r *Reader) blockKind(bh blockHandle) string { + switch bh.offset { + case r.metaBH.offset: + return "meta-block" + case r.indexBH.offset: + return "index-block" + case r.filterBH.offset: + if r.filterBH.length > 0 { + return "filter-block" + } + } + return "data-block" +} + +func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error { + return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}} +} + +func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error { + return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason) +} + +func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error { + if cerr, ok := err.(*ErrCorrupted); ok { + cerr.Pos = int64(bh.offset) + cerr.Size = int64(bh.length) + cerr.Kind = r.blockKind(bh) + return &errors.ErrCorrupted{Fd: r.fd, Err: cerr} + } + return err +} + +func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) { + data := r.bpool.Get(int(bh.length + blockTrailerLen)) + if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF { + return nil, err + } + + if verifyChecksum { + n := bh.length + 1 + checksum0 := binary.LittleEndian.Uint32(data[n:]) + checksum1 := util.NewCRC(data[:n]).Value() + if checksum0 != checksum1 { + r.bpool.Put(data) + return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1)) + } + } + + switch data[bh.length] { + case blockTypeNoCompression: + data = data[:bh.length] + case blockTypeSnappyCompression: + decLen, err := snappy.DecodedLen(data[:bh.length]) + if err != nil { + r.bpool.Put(data) + return nil, r.newErrCorruptedBH(bh, err.Error()) + } + decData := r.bpool.Get(decLen) + decData, err = snappy.Decode(decData, data[:bh.length]) + r.bpool.Put(data) + if err != nil { + r.bpool.Put(decData) + return nil, r.newErrCorruptedBH(bh, err.Error()) + } + data = decData + default: + r.bpool.Put(data) + return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length])) + } + return data, nil +} + +func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) { + data, err := r.readRawBlock(bh, verifyChecksum) + if err != nil { + return nil, err + } + restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:])) + b := &block{ + bpool: r.bpool, + bh: bh, + data: data, + restartsLen: restartsLen, + restartsOffset: len(data) - (restartsLen+1)*4, + } + return b, nil +} + +func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) { + if r.cache != nil { + var ( + err error + ch *cache.Handle + ) + if fillCache { + ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) { + var b *block + b, err = r.readBlock(bh, verifyChecksum) + if err != nil { + return 0, nil + } + return cap(b.data), b + }) + } else { + ch = r.cache.Get(bh.offset, nil) + } + if ch != nil { + b, ok := ch.Value().(*block) + if !ok { + ch.Release() + return nil, nil, errors.New("leveldb/table: inconsistent block type") + } + return b, ch, err + } else if err != nil { + return nil, nil, err + } + } + + b, err := r.readBlock(bh, verifyChecksum) + return b, b, err +} + +func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) { + data, err := r.readRawBlock(bh, true) + if err != nil { + return nil, err + } + n := len(data) + if n < 5 { + return nil, r.newErrCorruptedBH(bh, "too short") + } + m := n - 5 + oOffset := int(binary.LittleEndian.Uint32(data[m:])) + if oOffset > m { + return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset") + } + b := &filterBlock{ + bpool: r.bpool, + data: data, + oOffset: oOffset, + baseLg: uint(data[n-1]), + filtersNum: (m - oOffset) / 4, + } + return b, nil +} + +func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) { + if r.cache != nil { + var ( + err error + ch *cache.Handle + ) + if fillCache { + ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) { + var b *filterBlock + b, err = r.readFilterBlock(bh) + if err != nil { + return 0, nil + } + return cap(b.data), b + }) + } else { + ch = r.cache.Get(bh.offset, nil) + } + if ch != nil { + b, ok := ch.Value().(*filterBlock) + if !ok { + ch.Release() + return nil, nil, errors.New("leveldb/table: inconsistent block type") + } + return b, ch, err + } else if err != nil { + return nil, nil, err + } + } + + b, err := r.readFilterBlock(bh) + return b, b, err +} + +func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) { + if r.indexBlock == nil { + return r.readBlockCached(r.indexBH, true, fillCache) + } + return r.indexBlock, util.NoopReleaser{}, nil +} + +func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) { + if r.filterBlock == nil { + return r.readFilterBlockCached(r.filterBH, fillCache) + } + return r.filterBlock, util.NoopReleaser{}, nil +} + +func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter { + bi := &blockIter{ + tr: r, + block: b, + blockReleaser: bReleaser, + // Valid key should never be nil. + key: make([]byte, 0), + dir: dirSOI, + riStart: 0, + riLimit: b.restartsLen, + offsetStart: 0, + offsetRealStart: 0, + offsetLimit: b.restartsOffset, + } + if slice != nil { + if slice.Start != nil { + if bi.Seek(slice.Start) { + bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset) + bi.offsetStart = b.restartOffset(bi.riStart) + bi.offsetRealStart = bi.prevOffset + } else { + bi.riStart = b.restartsLen + bi.offsetStart = b.restartsOffset + bi.offsetRealStart = b.restartsOffset + } + } + if slice.Limit != nil { + if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) { + bi.offsetLimit = bi.prevOffset + bi.riLimit = bi.restartIndex + 1 + } + } + bi.reset() + if bi.offsetStart > bi.offsetLimit { + bi.sErr(errors.New("leveldb/table: invalid slice range")) + } + } + return bi +} + +func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator { + b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache) + if err != nil { + return iterator.NewEmptyIterator(err) + } + return r.newBlockIter(b, rel, slice, false) +} + +func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.err != nil { + return iterator.NewEmptyIterator(r.err) + } + + return r.getDataIter(dataBH, slice, verifyChecksum, fillCache) +} + +// NewIterator creates an iterator from the table. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// table. And a nil Range.Limit is treated as a key after all keys in +// the table. +// +// The returned iterator is not safe for concurrent use and should be released +// after use. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.err != nil { + return iterator.NewEmptyIterator(r.err) + } + + fillCache := !ro.GetDontFillCache() + indexBlock, rel, err := r.getIndexBlock(fillCache) + if err != nil { + return iterator.NewEmptyIterator(err) + } + index := &indexIter{ + blockIter: r.newBlockIter(indexBlock, rel, slice, true), + tr: r, + slice: slice, + fillCache: !ro.GetDontFillCache(), + } + return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader)) +} + +func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.err != nil { + err = r.err + return + } + + indexBlock, rel, err := r.getIndexBlock(true) + if err != nil { + return + } + defer rel.Release() + + index := r.newBlockIter(indexBlock, nil, nil, true) + defer index.Release() + + if !index.Seek(key) { + if err = index.Error(); err == nil { + err = ErrNotFound + } + return + } + + dataBH, n := decodeBlockHandle(index.Value()) + if n == 0 { + r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") + return nil, nil, r.err + } + + // The filter should only used for exact match. + if filtered && r.filter != nil { + filterBlock, frel, ferr := r.getFilterBlock(true) + if ferr == nil { + if !filterBlock.contains(r.filter, dataBH.offset, key) { + frel.Release() + return nil, nil, ErrNotFound + } + frel.Release() + } else if !errors.IsCorrupted(ferr) { + return nil, nil, ferr + } + } + + data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache()) + if !data.Seek(key) { + data.Release() + if err = data.Error(); err != nil { + return + } + + // The nearest greater-than key is the first key of the next block. + if !index.Next() { + if err = index.Error(); err == nil { + err = ErrNotFound + } + return + } + + dataBH, n = decodeBlockHandle(index.Value()) + if n == 0 { + r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") + return nil, nil, r.err + } + + data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache()) + if !data.Next() { + data.Release() + if err = data.Error(); err == nil { + err = ErrNotFound + } + return + } + } + + // Key doesn't use block buffer, no need to copy the buffer. + rkey = data.Key() + if !noValue { + if r.bpool == nil { + value = data.Value() + } else { + // Value does use block buffer, and since the buffer will be + // recycled, it need to be copied. + value = append([]byte{}, data.Value()...) + } + } + data.Release() + return +} + +// Find finds key/value pair whose key is greater than or equal to the +// given key. It returns ErrNotFound if the table doesn't contain +// such pair. +// If filtered is true then the nearest 'block' will be checked against +// 'filter data' (if present) and will immediately return ErrNotFound if +// 'filter data' indicates that such pair doesn't exist. +// +// The caller may modify the contents of the returned slice as it is its +// own copy. +// It is safe to modify the contents of the argument after Find returns. +func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) { + return r.find(key, filtered, ro, false) +} + +// FindKey finds key that is greater than or equal to the given key. +// It returns ErrNotFound if the table doesn't contain such key. +// If filtered is true then the nearest 'block' will be checked against +// 'filter data' (if present) and will immediately return ErrNotFound if +// 'filter data' indicates that such key doesn't exist. +// +// The caller may modify the contents of the returned slice as it is its +// own copy. +// It is safe to modify the contents of the argument after Find returns. +func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) { + rkey, _, err = r.find(key, filtered, ro, true) + return +} + +// Get gets the value for the given key. It returns errors.ErrNotFound +// if the table does not contain the key. +// +// The caller may modify the contents of the returned slice as it is its +// own copy. +// It is safe to modify the contents of the argument after Find returns. +func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.err != nil { + err = r.err + return + } + + rkey, value, err := r.find(key, false, ro, false) + if err == nil && r.cmp.Compare(rkey, key) != 0 { + value = nil + err = ErrNotFound + } + return +} + +// OffsetOf returns approximate offset for the given key. +// +// It is safe to modify the contents of the argument after Get returns. +func (r *Reader) OffsetOf(key []byte) (offset int64, err error) { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.err != nil { + err = r.err + return + } + + indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true) + if err != nil { + return + } + defer rel.Release() + + index := r.newBlockIter(indexBlock, nil, nil, true) + defer index.Release() + if index.Seek(key) { + dataBH, n := decodeBlockHandle(index.Value()) + if n == 0 { + r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle") + return + } + offset = int64(dataBH.offset) + return + } + err = index.Error() + if err == nil { + offset = r.dataEnd + } + return +} + +// Release implements util.Releaser. +// It also close the file if it is an io.Closer. +func (r *Reader) Release() { + r.mu.Lock() + defer r.mu.Unlock() + + if closer, ok := r.reader.(io.Closer); ok { + closer.Close() + } + if r.indexBlock != nil { + r.indexBlock.Release() + r.indexBlock = nil + } + if r.filterBlock != nil { + r.filterBlock.Release() + r.filterBlock = nil + } + r.reader = nil + r.cache = nil + r.bpool = nil + r.err = ErrReaderReleased +} + +// NewReader creates a new initialized table reader for the file. +// The fi, cache and bpool is optional and can be nil. +// +// The returned table reader instance is safe for concurrent use. +func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { + if f == nil { + return nil, errors.New("leveldb/table: nil file") + } + + r := &Reader{ + fd: fd, + reader: f, + cache: cache, + bpool: bpool, + o: o, + cmp: o.GetComparer(), + verifyChecksum: o.GetStrict(opt.StrictBlockChecksum), + } + + if size < footerLen { + r.err = r.newErrCorrupted(0, size, "table", "too small") + return r, nil + } + + footerPos := size - footerLen + var footer [footerLen]byte + if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF { + return nil, err + } + if string(footer[footerLen-len(magic):footerLen]) != magic { + r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number") + return r, nil + } + + var n int + // Decode the metaindex block handle. + r.metaBH, n = decodeBlockHandle(footer[:]) + if n == 0 { + r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle") + return r, nil + } + + // Decode the index block handle. + r.indexBH, n = decodeBlockHandle(footer[n:]) + if n == 0 { + r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle") + return r, nil + } + + // Read metaindex block. + metaBlock, err := r.readBlock(r.metaBH, true) + if err != nil { + if errors.IsCorrupted(err) { + r.err = err + return r, nil + } + return nil, err + } + + // Set data end. + r.dataEnd = int64(r.metaBH.offset) + + // Read metaindex. + metaIter := r.newBlockIter(metaBlock, nil, nil, true) + for metaIter.Next() { + key := string(metaIter.Key()) + if !strings.HasPrefix(key, "filter.") { + continue + } + fn := key[7:] + if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn { + r.filter = f0 + } else { + for _, f0 := range o.GetAltFilters() { + if f0.Name() == fn { + r.filter = f0 + break + } + } + } + if r.filter != nil { + filterBH, n := decodeBlockHandle(metaIter.Value()) + if n == 0 { + continue + } + r.filterBH = filterBH + // Update data end. + r.dataEnd = int64(filterBH.offset) + break + } + } + metaIter.Release() + metaBlock.Release() + + // Cache index and filter block locally, since we don't have global cache. + if cache == nil { + r.indexBlock, err = r.readBlock(r.indexBH, true) + if err != nil { + if errors.IsCorrupted(err) { + r.err = err + return r, nil + } + return nil, err + } + if r.filter != nil { + r.filterBlock, err = r.readFilterBlock(r.filterBH) + if err != nil { + if !errors.IsCorrupted(err) { + return nil, err + } + + // Don't use filter then. + r.filter = nil + } + } + } + + return r, nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go new file mode 100644 index 0000000000..beacdc1f02 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go @@ -0,0 +1,177 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package table allows read and write sorted key/value. +package table + +import ( + "encoding/binary" +) + +/* +Table: + +Table is consist of one or more data blocks, an optional filter block +a metaindex block, an index block and a table footer. Metaindex block +is a special block used to keep parameters of the table, such as filter +block name and its block handle. Index block is a special block used to +keep record of data blocks offset and length, index block use one as +restart interval. The key used by index block are the last key of preceding +block, shorter separator of adjacent blocks or shorter successor of the +last key of the last block. Filter block is an optional block contains +sequence of filter data generated by a filter generator. + +Table data structure: + + optional + / + +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+ + | data block 1 | ... | data block n | filter block | metaindex block | index block | footer | + +--------------+--------------+--------------+--------------+-----------------+-------------+--------+ + + Each block followed by a 5-bytes trailer contains compression type and checksum. + +Table block trailer: + + +---------------------------+-------------------+ + | compression type (1-byte) | checksum (4-byte) | + +---------------------------+-------------------+ + + The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression + type also included in the checksum. + +Table footer: + + +------------------- 40-bytes -------------------+ + / \ + +------------------------+--------------------+------+-----------------+ + | metaindex block handle / index block handle / ---- | magic (8-bytes) | + +------------------------+--------------------+------+-----------------+ + + The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/". + +NOTE: All fixed-length integer are little-endian. +*/ + +/* +Block: + +Block is consist of one or more key/value entries and a block trailer. +Block entry shares key prefix with its preceding key until a restart +point reached. A block should contains at least one restart point. +First restart point are always zero. + +Block data structure: + + + restart point + restart point (depends on restart interval) + / / + +---------------+---------------+---------------+---------------+---------+ + | block entry 1 | block entry 2 | ... | block entry n | trailer | + +---------------+---------------+---------------+---------------+---------+ + +Key/value entry: + + +---- key len ----+ + / \ + +-------+---------+-----------+---------+--------------------+--------------+----------------+ + | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) | + +-----------------+---------------------+--------------------+--------------+----------------+ + + Block entry shares key prefix with its preceding key: + Conditions: + restart_interval=2 + entry one : key=deck,value=v1 + entry two : key=dock,value=v2 + entry three: key=duck,value=v3 + The entries will be encoded as follow: + + + restart point (offset=0) + restart point (offset=16) + / / + +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+ + | 0 | 4 | 2 | "deck" | "v1" | 1 | 3 | 2 | "ock" | "v2" | 0 | 4 | 2 | "duck" | "v3" | + +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+ + \ / \ / \ / + +----------- entry one -----------+ +----------- entry two ----------+ +---------- entry three ----------+ + + The block trailer will contains two restart points: + + +------------+-----------+--------+ + | 0 | 16 | 2 | + +------------+-----------+---+----+ + \ / \ + +-- restart points --+ + restart points length + +Block trailer: + + +-- 4-bytes --+ + / \ + +-----------------+-----------------+-----------------+------------------------------+ + | restart point 1 | .... | restart point n | restart points len (4-bytes) | + +-----------------+-----------------+-----------------+------------------------------+ + + +NOTE: All fixed-length integer are little-endian. +*/ + +/* +Filter block: + +Filter block consist of one or more filter data and a filter block trailer. +The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg. + +Filter block data structure: + + + offset 1 + offset 2 + offset n + trailer offset + / / / / + +---------------+---------------+---------------+---------+ + | filter data 1 | ... | filter data n | trailer | + +---------------+---------------+---------------+---------+ + +Filter block trailer: + + +- 4-bytes -+ + / \ + +---------------+---------------+---------------+-------------------------------+------------------+ + | data 1 offset | .... | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) | + +-------------- +---------------+---------------+-------------------------------+------------------+ + + +NOTE: All fixed-length integer are little-endian. +*/ + +const ( + blockTrailerLen = 5 + footerLen = 48 + + magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb" + + // The block type gives the per-block compression format. + // These constants are part of the file format and should not be changed. + blockTypeNoCompression = 0 + blockTypeSnappyCompression = 1 + + // Generate new filter every 2KB of data + filterBaseLg = 11 + filterBase = 1 << filterBaseLg +) + +type blockHandle struct { + offset, length uint64 +} + +func decodeBlockHandle(src []byte) (blockHandle, int) { + offset, n := binary.Uvarint(src) + length, m := binary.Uvarint(src[n:]) + if n == 0 || m == 0 { + return blockHandle{}, 0 + } + return blockHandle{offset, length}, n + m +} + +func encodeBlockHandle(dst []byte, b blockHandle) int { + n := binary.PutUvarint(dst, b.offset) + m := binary.PutUvarint(dst[n:], b.length) + return n + m +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go new file mode 100644 index 0000000000..b96b271d8d --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go @@ -0,0 +1,375 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package table + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/golang/snappy" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func sharedPrefixLen(a, b []byte) int { + i, n := 0, len(a) + if n > len(b) { + n = len(b) + } + for i < n && a[i] == b[i] { + i++ + } + return i +} + +type blockWriter struct { + restartInterval int + buf util.Buffer + nEntries int + prevKey []byte + restarts []uint32 + scratch []byte +} + +func (w *blockWriter) append(key, value []byte) { + nShared := 0 + if w.nEntries%w.restartInterval == 0 { + w.restarts = append(w.restarts, uint32(w.buf.Len())) + } else { + nShared = sharedPrefixLen(w.prevKey, key) + } + n := binary.PutUvarint(w.scratch[0:], uint64(nShared)) + n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared)) + n += binary.PutUvarint(w.scratch[n:], uint64(len(value))) + w.buf.Write(w.scratch[:n]) + w.buf.Write(key[nShared:]) + w.buf.Write(value) + w.prevKey = append(w.prevKey[:0], key...) + w.nEntries++ +} + +func (w *blockWriter) finish() { + // Write restarts entry. + if w.nEntries == 0 { + // Must have at least one restart entry. + w.restarts = append(w.restarts, 0) + } + w.restarts = append(w.restarts, uint32(len(w.restarts))) + for _, x := range w.restarts { + buf4 := w.buf.Alloc(4) + binary.LittleEndian.PutUint32(buf4, x) + } +} + +func (w *blockWriter) reset() { + w.buf.Reset() + w.nEntries = 0 + w.restarts = w.restarts[:0] +} + +func (w *blockWriter) bytesLen() int { + restartsLen := len(w.restarts) + if restartsLen == 0 { + restartsLen = 1 + } + return w.buf.Len() + 4*restartsLen + 4 +} + +type filterWriter struct { + generator filter.FilterGenerator + buf util.Buffer + nKeys int + offsets []uint32 +} + +func (w *filterWriter) add(key []byte) { + if w.generator == nil { + return + } + w.generator.Add(key) + w.nKeys++ +} + +func (w *filterWriter) flush(offset uint64) { + if w.generator == nil { + return + } + for x := int(offset / filterBase); x > len(w.offsets); { + w.generate() + } +} + +func (w *filterWriter) finish() { + if w.generator == nil { + return + } + // Generate last keys. + + if w.nKeys > 0 { + w.generate() + } + w.offsets = append(w.offsets, uint32(w.buf.Len())) + for _, x := range w.offsets { + buf4 := w.buf.Alloc(4) + binary.LittleEndian.PutUint32(buf4, x) + } + w.buf.WriteByte(filterBaseLg) +} + +func (w *filterWriter) generate() { + // Record offset. + w.offsets = append(w.offsets, uint32(w.buf.Len())) + // Generate filters. + if w.nKeys > 0 { + w.generator.Generate(&w.buf) + w.nKeys = 0 + } +} + +// Writer is a table writer. +type Writer struct { + writer io.Writer + err error + // Options + cmp comparer.Comparer + filter filter.Filter + compression opt.Compression + blockSize int + + dataBlock blockWriter + indexBlock blockWriter + filterBlock filterWriter + pendingBH blockHandle + offset uint64 + nEntries int + // Scratch allocated enough for 5 uvarint. Block writer should not use + // first 20-bytes since it will be used to encode block handle, which + // then passed to the block writer itself. + scratch [50]byte + comparerScratch []byte + compressionScratch []byte +} + +func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) { + // Compress the buffer if necessary. + var b []byte + if compression == opt.SnappyCompression { + // Allocate scratch enough for compression and block trailer. + if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n { + w.compressionScratch = make([]byte, n) + } + compressed := snappy.Encode(w.compressionScratch, buf.Bytes()) + n := len(compressed) + b = compressed[:n+blockTrailerLen] + b[n] = blockTypeSnappyCompression + } else { + tmp := buf.Alloc(blockTrailerLen) + tmp[0] = blockTypeNoCompression + b = buf.Bytes() + } + + // Calculate the checksum. + n := len(b) - 4 + checksum := util.NewCRC(b[:n]).Value() + binary.LittleEndian.PutUint32(b[n:], checksum) + + // Write the buffer to the file. + _, err = w.writer.Write(b) + if err != nil { + return + } + bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)} + w.offset += uint64(len(b)) + return +} + +func (w *Writer) flushPendingBH(key []byte) { + if w.pendingBH.length == 0 { + return + } + var separator []byte + if len(key) == 0 { + separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey) + } else { + separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key) + } + if separator == nil { + separator = w.dataBlock.prevKey + } else { + w.comparerScratch = separator + } + n := encodeBlockHandle(w.scratch[:20], w.pendingBH) + // Append the block handle to the index block. + w.indexBlock.append(separator, w.scratch[:n]) + // Reset prev key of the data block. + w.dataBlock.prevKey = w.dataBlock.prevKey[:0] + // Clear pending block handle. + w.pendingBH = blockHandle{} +} + +func (w *Writer) finishBlock() error { + w.dataBlock.finish() + bh, err := w.writeBlock(&w.dataBlock.buf, w.compression) + if err != nil { + return err + } + w.pendingBH = bh + // Reset the data block. + w.dataBlock.reset() + // Flush the filter block. + w.filterBlock.flush(w.offset) + return nil +} + +// Append appends key/value pair to the table. The keys passed must +// be in increasing order. +// +// It is safe to modify the contents of the arguments after Append returns. +func (w *Writer) Append(key, value []byte) error { + if w.err != nil { + return w.err + } + if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 { + w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key) + return w.err + } + + w.flushPendingBH(key) + // Append key/value pair to the data block. + w.dataBlock.append(key, value) + // Add key to the filter block. + w.filterBlock.add(key) + + // Finish the data block if block size target reached. + if w.dataBlock.bytesLen() >= w.blockSize { + if err := w.finishBlock(); err != nil { + w.err = err + return w.err + } + } + w.nEntries++ + return nil +} + +// BlocksLen returns number of blocks written so far. +func (w *Writer) BlocksLen() int { + n := w.indexBlock.nEntries + if w.pendingBH.length > 0 { + // Includes the pending block. + n++ + } + return n +} + +// EntriesLen returns number of entries added so far. +func (w *Writer) EntriesLen() int { + return w.nEntries +} + +// BytesLen returns number of bytes written so far. +func (w *Writer) BytesLen() int { + return int(w.offset) +} + +// Close will finalize the table. Calling Append is not possible +// after Close, but calling BlocksLen, EntriesLen and BytesLen +// is still possible. +func (w *Writer) Close() error { + if w.err != nil { + return w.err + } + + // Write the last data block. Or empty data block if there + // aren't any data blocks at all. + if w.dataBlock.nEntries > 0 || w.nEntries == 0 { + if err := w.finishBlock(); err != nil { + w.err = err + return w.err + } + } + w.flushPendingBH(nil) + + // Write the filter block. + var filterBH blockHandle + w.filterBlock.finish() + if buf := &w.filterBlock.buf; buf.Len() > 0 { + filterBH, w.err = w.writeBlock(buf, opt.NoCompression) + if w.err != nil { + return w.err + } + } + + // Write the metaindex block. + if filterBH.length > 0 { + key := []byte("filter." + w.filter.Name()) + n := encodeBlockHandle(w.scratch[:20], filterBH) + w.dataBlock.append(key, w.scratch[:n]) + } + w.dataBlock.finish() + metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression) + if err != nil { + w.err = err + return w.err + } + + // Write the index block. + w.indexBlock.finish() + indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression) + if err != nil { + w.err = err + return w.err + } + + // Write the table footer. + footer := w.scratch[:footerLen] + for i := range footer { + footer[i] = 0 + } + n := encodeBlockHandle(footer, metaindexBH) + encodeBlockHandle(footer[n:], indexBH) + copy(footer[footerLen-len(magic):], magic) + if _, err := w.writer.Write(footer); err != nil { + w.err = err + return w.err + } + w.offset += footerLen + + w.err = errors.New("leveldb/table: writer is closed") + return nil +} + +// NewWriter creates a new initialized table writer for the file. +// +// Table writer is not safe for concurrent use. +func NewWriter(f io.Writer, o *opt.Options) *Writer { + w := &Writer{ + writer: f, + cmp: o.GetComparer(), + filter: o.GetFilter(), + compression: o.GetCompression(), + blockSize: o.GetBlockSize(), + comparerScratch: make([]byte, 0), + } + // data block + w.dataBlock.restartInterval = o.GetBlockRestartInterval() + // The first 20-bytes are used for encoding block handle. + w.dataBlock.scratch = w.scratch[20:] + // index block + w.indexBlock.restartInterval = 1 + w.indexBlock.scratch = w.scratch[20:] + // filter block + if w.filter != nil { + w.filterBlock.generator = w.filter.NewGenerator() + w.filterBlock.flush(0) + } + return w +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util.go new file mode 100644 index 0000000000..0e2b519e5c --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util.go @@ -0,0 +1,98 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "sort" + + "github.com/syndtr/goleveldb/leveldb/storage" +) + +func shorten(str string) string { + if len(str) <= 8 { + return str + } + return str[:3] + ".." + str[len(str)-3:] +} + +var bunits = [...]string{"", "Ki", "Mi", "Gi", "Ti"} + +func shortenb(bytes int) string { + i := 0 + for ; bytes > 1024 && i < 4; i++ { + bytes /= 1024 + } + return fmt.Sprintf("%d%sB", bytes, bunits[i]) +} + +func sshortenb(bytes int) string { + if bytes == 0 { + return "~" + } + sign := "+" + if bytes < 0 { + sign = "-" + bytes *= -1 + } + i := 0 + for ; bytes > 1024 && i < 4; i++ { + bytes /= 1024 + } + return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i]) +} + +func sint(x int) string { + if x == 0 { + return "~" + } + sign := "+" + if x < 0 { + sign = "-" + x *= -1 + } + return fmt.Sprintf("%s%d", sign, x) +} + +func minInt(a, b int) int { + if a < b { + return a + } + return b +} + +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + +type fdSorter []storage.FileDesc + +func (p fdSorter) Len() int { + return len(p) +} + +func (p fdSorter) Less(i, j int) bool { + return p[i].Num < p[j].Num +} + +func (p fdSorter) Swap(i, j int) { + p[i], p[j] = p[j], p[i] +} + +func sortFds(fds []storage.FileDesc) { + sort.Sort(fdSorter(fds)) +} + +func ensureBuffer(b []byte, n int) []byte { + if cap(b) < n { + return make([]byte, n) + } + return b[:n] +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go new file mode 100644 index 0000000000..21de242552 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go @@ -0,0 +1,293 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package util + +// This a copy of Go std bytes.Buffer with some modification +// and some features stripped. + +import ( + "bytes" + "io" +) + +// A Buffer is a variable-sized buffer of bytes with Read and Write methods. +// The zero value for Buffer is an empty buffer ready to use. +type Buffer struct { + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation. +} + +// Bytes returns a slice of the contents of the unread portion of the buffer; +// len(b.Bytes()) == b.Len(). If the caller changes the contents of the +// returned slice, the contents of the buffer will change provided there +// are no intervening method calls on the Buffer. +func (b *Buffer) Bytes() []byte { return b.buf[b.off:] } + +// String returns the contents of the unread portion of the buffer +// as a string. If the Buffer is a nil pointer, it returns "<nil>". +func (b *Buffer) String() string { + if b == nil { + // Special case, useful in debugging. + return "<nil>" + } + return string(b.buf[b.off:]) +} + +// Len returns the number of bytes of the unread portion of the buffer; +// b.Len() == len(b.Bytes()). +func (b *Buffer) Len() int { return len(b.buf) - b.off } + +// Truncate discards all but the first n unread bytes from the buffer. +// It panics if n is negative or greater than the length of the buffer. +func (b *Buffer) Truncate(n int) { + switch { + case n < 0 || n > b.Len(): + panic("leveldb/util.Buffer: truncation out of range") + case n == 0: + // Reuse buffer space. + b.off = 0 + } + b.buf = b.buf[0 : b.off+n] +} + +// Reset resets the buffer so it has no content. +// b.Reset() is the same as b.Truncate(0). +func (b *Buffer) Reset() { b.Truncate(0) } + +// grow grows the buffer to guarantee space for n more bytes. +// It returns the index where bytes should be written. +// If the buffer can't grow it will panic with bytes.ErrTooLarge. +func (b *Buffer) grow(n int) int { + m := b.Len() + // If buffer is empty, reset to recover space. + if m == 0 && b.off != 0 { + b.Truncate(0) + } + if len(b.buf)+n > cap(b.buf) { + var buf []byte + if b.buf == nil && n <= len(b.bootstrap) { + buf = b.bootstrap[0:] + } else if m+n <= cap(b.buf)/2 { + // We can slide things down instead of allocating a new + // slice. We only need m+n <= cap(b.buf) to slide, but + // we instead let capacity get twice as large so we + // don't spend all our time copying. + copy(b.buf[:], b.buf[b.off:]) + buf = b.buf[:m] + } else { + // not enough space anywhere + buf = makeSlice(2*cap(b.buf) + n) + copy(buf, b.buf[b.off:]) + } + b.buf = buf + b.off = 0 + } + b.buf = b.buf[0 : b.off+m+n] + return b.off + m +} + +// Alloc allocs n bytes of slice from the buffer, growing the buffer as +// needed. If n is negative, Alloc will panic. +// If the buffer can't grow it will panic with bytes.ErrTooLarge. +func (b *Buffer) Alloc(n int) []byte { + if n < 0 { + panic("leveldb/util.Buffer.Alloc: negative count") + } + m := b.grow(n) + return b.buf[m:] +} + +// Grow grows the buffer's capacity, if necessary, to guarantee space for +// another n bytes. After Grow(n), at least n bytes can be written to the +// buffer without another allocation. +// If n is negative, Grow will panic. +// If the buffer can't grow it will panic with bytes.ErrTooLarge. +func (b *Buffer) Grow(n int) { + if n < 0 { + panic("leveldb/util.Buffer.Grow: negative count") + } + m := b.grow(n) + b.buf = b.buf[0:m] +} + +// Write appends the contents of p to the buffer, growing the buffer as +// needed. The return value n is the length of p; err is always nil. If the +// buffer becomes too large, Write will panic with bytes.ErrTooLarge. +func (b *Buffer) Write(p []byte) (n int, err error) { + m := b.grow(len(p)) + return copy(b.buf[m:], p), nil +} + +// MinRead is the minimum slice size passed to a Read call by +// Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond +// what is required to hold the contents of r, ReadFrom will not grow the +// underlying buffer. +const MinRead = 512 + +// ReadFrom reads data from r until EOF and appends it to the buffer, growing +// the buffer as needed. The return value n is the number of bytes read. Any +// error except io.EOF encountered during the read is also returned. If the +// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge. +func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) { + // If buffer is empty, reset to recover space. + if b.off >= len(b.buf) { + b.Truncate(0) + } + for { + if free := cap(b.buf) - len(b.buf); free < MinRead { + // not enough space at end + newBuf := b.buf + if b.off+free < MinRead { + // not enough space using beginning of buffer; + // double buffer capacity + newBuf = makeSlice(2*cap(b.buf) + MinRead) + } + copy(newBuf, b.buf[b.off:]) + b.buf = newBuf[:len(b.buf)-b.off] + b.off = 0 + } + m, e := r.Read(b.buf[len(b.buf):cap(b.buf)]) + b.buf = b.buf[0 : len(b.buf)+m] + n += int64(m) + if e == io.EOF { + break + } + if e != nil { + return n, e + } + } + return n, nil // err is EOF, so return nil explicitly +} + +// makeSlice allocates a slice of size n. If the allocation fails, it panics +// with bytes.ErrTooLarge. +func makeSlice(n int) []byte { + // If the make fails, give a known error. + defer func() { + if recover() != nil { + panic(bytes.ErrTooLarge) + } + }() + return make([]byte, n) +} + +// WriteTo writes data to w until the buffer is drained or an error occurs. +// The return value n is the number of bytes written; it always fits into an +// int, but it is int64 to match the io.WriterTo interface. Any error +// encountered during the write is also returned. +func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) { + if b.off < len(b.buf) { + nBytes := b.Len() + m, e := w.Write(b.buf[b.off:]) + if m > nBytes { + panic("leveldb/util.Buffer.WriteTo: invalid Write count") + } + b.off += m + n = int64(m) + if e != nil { + return n, e + } + // all bytes should have been written, by definition of + // Write method in io.Writer + if m != nBytes { + return n, io.ErrShortWrite + } + } + // Buffer is now empty; reset. + b.Truncate(0) + return +} + +// WriteByte appends the byte c to the buffer, growing the buffer as needed. +// The returned error is always nil, but is included to match bufio.Writer's +// WriteByte. If the buffer becomes too large, WriteByte will panic with +// bytes.ErrTooLarge. +func (b *Buffer) WriteByte(c byte) error { + m := b.grow(1) + b.buf[m] = c + return nil +} + +// Read reads the next len(p) bytes from the buffer or until the buffer +// is drained. The return value n is the number of bytes read. If the +// buffer has no data to return, err is io.EOF (unless len(p) is zero); +// otherwise it is nil. +func (b *Buffer) Read(p []byte) (n int, err error) { + if b.off >= len(b.buf) { + // Buffer is empty, reset to recover space. + b.Truncate(0) + if len(p) == 0 { + return + } + return 0, io.EOF + } + n = copy(p, b.buf[b.off:]) + b.off += n + return +} + +// Next returns a slice containing the next n bytes from the buffer, +// advancing the buffer as if the bytes had been returned by Read. +// If there are fewer than n bytes in the buffer, Next returns the entire buffer. +// The slice is only valid until the next call to a read or write method. +func (b *Buffer) Next(n int) []byte { + m := b.Len() + if n > m { + n = m + } + data := b.buf[b.off : b.off+n] + b.off += n + return data +} + +// ReadByte reads and returns the next byte from the buffer. +// If no byte is available, it returns error io.EOF. +func (b *Buffer) ReadByte() (c byte, err error) { + if b.off >= len(b.buf) { + // Buffer is empty, reset to recover space. + b.Truncate(0) + return 0, io.EOF + } + c = b.buf[b.off] + b.off++ + return c, nil +} + +// ReadBytes reads until the first occurrence of delim in the input, +// returning a slice containing the data up to and including the delimiter. +// If ReadBytes encounters an error before finding a delimiter, +// it returns the data read before the error and the error itself (often io.EOF). +// ReadBytes returns err != nil if and only if the returned data does not end in +// delim. +func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) { + slice, err := b.readSlice(delim) + // return a copy of slice. The buffer's backing array may + // be overwritten by later calls. + line = append(line, slice...) + return +} + +// readSlice is like ReadBytes but returns a reference to internal buffer data. +func (b *Buffer) readSlice(delim byte) (line []byte, err error) { + i := bytes.IndexByte(b.buf[b.off:], delim) + end := b.off + i + 1 + if i < 0 { + end = len(b.buf) + err = io.EOF + } + line = b.buf[b.off:end] + b.off = end + return line, err +} + +// NewBuffer creates and initializes a new Buffer using buf as its initial +// contents. It is intended to prepare a Buffer to read existing data. It +// can also be used to size the internal buffer for writing. To do that, +// buf should have the desired capacity but a length of zero. +// +// In most cases, new(Buffer) (or just declaring a Buffer variable) is +// sufficient to initialize a Buffer. +func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go new file mode 100644 index 0000000000..2f3db974a7 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go @@ -0,0 +1,239 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +import ( + "fmt" + "sync" + "sync/atomic" + "time" +) + +type buffer struct { + b []byte + miss int +} + +// BufferPool is a 'buffer pool'. +type BufferPool struct { + pool [6]chan []byte + size [5]uint32 + sizeMiss [5]uint32 + sizeHalf [5]uint32 + baseline [4]int + baseline0 int + + mu sync.RWMutex + closed bool + closeC chan struct{} + + get uint32 + put uint32 + half uint32 + less uint32 + equal uint32 + greater uint32 + miss uint32 +} + +func (p *BufferPool) poolNum(n int) int { + if n <= p.baseline0 && n > p.baseline0/2 { + return 0 + } + for i, x := range p.baseline { + if n <= x { + return i + 1 + } + } + return len(p.baseline) + 1 +} + +// Get returns buffer with length of n. +func (p *BufferPool) Get(n int) []byte { + if p == nil { + return make([]byte, n) + } + + p.mu.RLock() + defer p.mu.RUnlock() + + if p.closed { + return make([]byte, n) + } + + atomic.AddUint32(&p.get, 1) + + poolNum := p.poolNum(n) + pool := p.pool[poolNum] + if poolNum == 0 { + // Fast path. + select { + case b := <-pool: + switch { + case cap(b) > n: + if cap(b)-n >= n { + atomic.AddUint32(&p.half, 1) + select { + case pool <- b: + default: + } + return make([]byte, n) + } else { + atomic.AddUint32(&p.less, 1) + return b[:n] + } + case cap(b) == n: + atomic.AddUint32(&p.equal, 1) + return b[:n] + default: + atomic.AddUint32(&p.greater, 1) + } + default: + atomic.AddUint32(&p.miss, 1) + } + + return make([]byte, n, p.baseline0) + } else { + sizePtr := &p.size[poolNum-1] + + select { + case b := <-pool: + switch { + case cap(b) > n: + if cap(b)-n >= n { + atomic.AddUint32(&p.half, 1) + sizeHalfPtr := &p.sizeHalf[poolNum-1] + if atomic.AddUint32(sizeHalfPtr, 1) == 20 { + atomic.StoreUint32(sizePtr, uint32(cap(b)/2)) + atomic.StoreUint32(sizeHalfPtr, 0) + } else { + select { + case pool <- b: + default: + } + } + return make([]byte, n) + } else { + atomic.AddUint32(&p.less, 1) + return b[:n] + } + case cap(b) == n: + atomic.AddUint32(&p.equal, 1) + return b[:n] + default: + atomic.AddUint32(&p.greater, 1) + if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) { + select { + case pool <- b: + default: + } + } + } + default: + atomic.AddUint32(&p.miss, 1) + } + + if size := atomic.LoadUint32(sizePtr); uint32(n) > size { + if size == 0 { + atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n)) + } else { + sizeMissPtr := &p.sizeMiss[poolNum-1] + if atomic.AddUint32(sizeMissPtr, 1) == 20 { + atomic.StoreUint32(sizePtr, uint32(n)) + atomic.StoreUint32(sizeMissPtr, 0) + } + } + return make([]byte, n) + } else { + return make([]byte, n, size) + } + } +} + +// Put adds given buffer to the pool. +func (p *BufferPool) Put(b []byte) { + if p == nil { + return + } + + p.mu.RLock() + defer p.mu.RUnlock() + + if p.closed { + return + } + + atomic.AddUint32(&p.put, 1) + + pool := p.pool[p.poolNum(cap(b))] + select { + case pool <- b: + default: + } + +} + +func (p *BufferPool) Close() { + if p == nil { + return + } + + p.mu.Lock() + if !p.closed { + p.closed = true + p.closeC <- struct{}{} + } + p.mu.Unlock() +} + +func (p *BufferPool) String() string { + if p == nil { + return "<nil>" + } + + return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}", + p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss) +} + +func (p *BufferPool) drain() { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + for { + select { + case <-ticker.C: + for _, ch := range p.pool { + select { + case <-ch: + default: + } + } + case <-p.closeC: + close(p.closeC) + for _, ch := range p.pool { + close(ch) + } + return + } + } +} + +// NewBufferPool creates a new initialized 'buffer pool'. +func NewBufferPool(baseline int) *BufferPool { + if baseline <= 0 { + panic("baseline can't be <= 0") + } + p := &BufferPool{ + baseline0: baseline, + baseline: [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4}, + closeC: make(chan struct{}, 1), + } + for i, cap := range []int{2, 2, 4, 4, 2, 1} { + p.pool[i] = make(chan []byte, cap) + } + go p.drain() + return p +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go new file mode 100644 index 0000000000..631c9d6109 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go @@ -0,0 +1,30 @@ +// Copyright 2011 The LevelDB-Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +import ( + "hash/crc32" +) + +var table = crc32.MakeTable(crc32.Castagnoli) + +// CRC is a CRC-32 checksum computed using Castagnoli's polynomial. +type CRC uint32 + +// NewCRC creates a new crc based on the given bytes. +func NewCRC(b []byte) CRC { + return CRC(0).Update(b) +} + +// Update updates the crc with the given bytes. +func (c CRC) Update(b []byte) CRC { + return CRC(crc32.Update(uint32(c), table, b)) +} + +// Value returns a masked crc. +func (c CRC) Value() uint32 { + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go new file mode 100644 index 0000000000..7f3fa4e2c7 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go @@ -0,0 +1,48 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +import ( + "encoding/binary" +) + +// Hash return hash of the given data. +func Hash(data []byte, seed uint32) uint32 { + // Similar to murmur hash + const ( + m = uint32(0xc6a4a793) + r = uint32(24) + ) + var ( + h = seed ^ (uint32(len(data)) * m) + i int + ) + + for n := len(data) - len(data)%4; i < n; i += 4 { + h += binary.LittleEndian.Uint32(data[i:]) + h *= m + h ^= (h >> 16) + } + + switch len(data) - i { + default: + panic("not reached") + case 3: + h += uint32(data[i+2]) << 16 + fallthrough + case 2: + h += uint32(data[i+1]) << 8 + fallthrough + case 1: + h += uint32(data[i]) + h *= m + h ^= (h >> r) + case 0: + } + + return h +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go new file mode 100644 index 0000000000..85159583d2 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go @@ -0,0 +1,32 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +// Range is a key range. +type Range struct { + // Start of the key range, include in the range. + Start []byte + + // Limit of the key range, not include in the range. + Limit []byte +} + +// BytesPrefix returns key range that satisfy the given prefix. +// This only applicable for the standard 'bytes comparer'. +func BytesPrefix(prefix []byte) *Range { + var limit []byte + for i := len(prefix) - 1; i >= 0; i-- { + c := prefix[i] + if c < 0xff { + limit = make([]byte, i+1) + copy(limit, prefix) + limit[i] = c + 1 + break + } + } + return &Range{prefix, limit} +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go new file mode 100644 index 0000000000..80614afc58 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go @@ -0,0 +1,73 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package util provides utilities used throughout leveldb. +package util + +import ( + "errors" +) + +var ( + ErrReleased = errors.New("leveldb: resource already relesed") + ErrHasReleaser = errors.New("leveldb: releaser already defined") +) + +// Releaser is the interface that wraps the basic Release method. +type Releaser interface { + // Release releases associated resources. Release should always success + // and can be called multiple times without causing error. + Release() +} + +// ReleaseSetter is the interface that wraps the basic SetReleaser method. +type ReleaseSetter interface { + // SetReleaser associates the given releaser to the resources. The + // releaser will be called once coresponding resources released. + // Calling SetReleaser with nil will clear the releaser. + // + // This will panic if a releaser already present or coresponding + // resource is already released. Releaser should be cleared first + // before assigned a new one. + SetReleaser(releaser Releaser) +} + +// BasicReleaser provides basic implementation of Releaser and ReleaseSetter. +type BasicReleaser struct { + releaser Releaser + released bool +} + +// Released returns whether Release method already called. +func (r *BasicReleaser) Released() bool { + return r.released +} + +// Release implements Releaser.Release. +func (r *BasicReleaser) Release() { + if !r.released { + if r.releaser != nil { + r.releaser.Release() + r.releaser = nil + } + r.released = true + } +} + +// SetReleaser implements ReleaseSetter.SetReleaser. +func (r *BasicReleaser) SetReleaser(releaser Releaser) { + if r.released { + panic(ErrReleased) + } + if r.releaser != nil && releaser != nil { + panic(ErrHasReleaser) + } + r.releaser = releaser +} + +type NoopReleaser struct{} + +func (NoopReleaser) Release() {} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/version.go b/vendor/github.com/syndtr/goleveldb/leveldb/version.go new file mode 100644 index 0000000000..73f272af5f --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/version.go @@ -0,0 +1,528 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "sync/atomic" + "unsafe" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type tSet struct { + level int + table *tFile +} + +type version struct { + s *session + + levels []tFiles + + // Level that should be compacted next and its compaction score. + // Score < 1 means compaction is not strictly needed. These fields + // are initialized by computeCompaction() + cLevel int + cScore float64 + + cSeek unsafe.Pointer + + closing bool + ref int + released bool +} + +func newVersion(s *session) *version { + return &version{s: s} +} + +func (v *version) incref() { + if v.released { + panic("already released") + } + + v.ref++ + if v.ref == 1 { + // Incr file ref. + for _, tt := range v.levels { + for _, t := range tt { + v.s.addFileRef(t.fd, 1) + } + } + } +} + +func (v *version) releaseNB() { + v.ref-- + if v.ref > 0 { + return + } else if v.ref < 0 { + panic("negative version ref") + } + + for _, tt := range v.levels { + for _, t := range tt { + if v.s.addFileRef(t.fd, -1) == 0 { + v.s.tops.remove(t) + } + } + } + + v.released = true +} + +func (v *version) release() { + v.s.vmu.Lock() + v.releaseNB() + v.s.vmu.Unlock() +} + +func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) { + ukey := ikey.ukey() + + // Aux level. + if aux != nil { + for _, t := range aux { + if t.overlaps(v.s.icmp, ukey, ukey) { + if !f(-1, t) { + return + } + } + } + + if lf != nil && !lf(-1) { + return + } + } + + // Walk tables level-by-level. + for level, tables := range v.levels { + if len(tables) == 0 { + continue + } + + if level == 0 { + // Level-0 files may overlap each other. Find all files that + // overlap ukey. + for _, t := range tables { + if t.overlaps(v.s.icmp, ukey, ukey) { + if !f(level, t) { + return + } + } + } + } else { + if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) { + t := tables[i] + if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 { + if !f(level, t) { + return + } + } + } + } + + if lf != nil && !lf(level) { + return + } + } +} + +func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) { + if v.closing { + return nil, false, ErrClosed + } + + ukey := ikey.ukey() + + var ( + tset *tSet + tseek bool + + // Level-0. + zfound bool + zseq uint64 + zkt keyType + zval []byte + ) + + err = ErrNotFound + + // Since entries never hop across level, finding key/value + // in smaller level make later levels irrelevant. + v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool { + if level >= 0 && !tseek { + if tset == nil { + tset = &tSet{level, t} + } else { + tseek = true + } + } + + var ( + fikey, fval []byte + ferr error + ) + if noValue { + fikey, ferr = v.s.tops.findKey(t, ikey, ro) + } else { + fikey, fval, ferr = v.s.tops.find(t, ikey, ro) + } + + switch ferr { + case nil: + case ErrNotFound: + return true + default: + err = ferr + return false + } + + if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil { + if v.s.icmp.uCompare(ukey, fukey) == 0 { + // Level <= 0 may overlaps each-other. + if level <= 0 { + if fseq >= zseq { + zfound = true + zseq = fseq + zkt = fkt + zval = fval + } + } else { + switch fkt { + case keyTypeVal: + value = fval + err = nil + case keyTypeDel: + default: + panic("leveldb: invalid internalKey type") + } + return false + } + } + } else { + err = fkerr + return false + } + + return true + }, func(level int) bool { + if zfound { + switch zkt { + case keyTypeVal: + value = zval + err = nil + case keyTypeDel: + default: + panic("leveldb: invalid internalKey type") + } + return false + } + + return true + }) + + if tseek && tset.table.consumeSeek() <= 0 { + tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) + } + + return +} + +func (v *version) sampleSeek(ikey internalKey) (tcomp bool) { + var tset *tSet + + v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool { + if tset == nil { + tset = &tSet{level, t} + return true + } + if tset.table.consumeSeek() <= 0 { + tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) + } + return false + }, nil) + + return +} + +func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) { + strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader) + for level, tables := range v.levels { + if level == 0 { + // Merge all level zero files together since they may overlap. + for _, t := range tables { + its = append(its, v.s.tops.newIterator(t, slice, ro)) + } + } else if len(tables) != 0 { + its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict)) + } + } + return +} + +func (v *version) newStaging() *versionStaging { + return &versionStaging{base: v} +} + +// Spawn a new version based on this version. +func (v *version) spawn(r *sessionRecord) *version { + staging := v.newStaging() + staging.commit(r) + return staging.finish() +} + +func (v *version) fillRecord(r *sessionRecord) { + for level, tables := range v.levels { + for _, t := range tables { + r.addTableFile(level, t) + } + } +} + +func (v *version) tLen(level int) int { + if level < len(v.levels) { + return len(v.levels[level]) + } + return 0 +} + +func (v *version) offsetOf(ikey internalKey) (n int64, err error) { + for level, tables := range v.levels { + for _, t := range tables { + if v.s.icmp.Compare(t.imax, ikey) <= 0 { + // Entire file is before "ikey", so just add the file size + n += t.size + } else if v.s.icmp.Compare(t.imin, ikey) > 0 { + // Entire file is after "ikey", so ignore + if level > 0 { + // Files other than level 0 are sorted by meta->min, so + // no further files in this level will contain data for + // "ikey". + break + } + } else { + // "ikey" falls in the range for this table. Add the + // approximate offset of "ikey" within the table. + if m, err := v.s.tops.offsetOf(t, ikey); err == nil { + n += m + } else { + return 0, err + } + } + } + } + + return +} + +func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) { + if maxLevel > 0 { + if len(v.levels) == 0 { + return maxLevel + } + if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) { + var overlaps tFiles + for ; level < maxLevel; level++ { + if pLevel := level + 1; pLevel >= len(v.levels) { + return maxLevel + } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) { + break + } + if gpLevel := level + 2; gpLevel < len(v.levels) { + overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false) + if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) { + break + } + } + } + } + } + return +} + +func (v *version) computeCompaction() { + // Precomputed best level for next compaction + bestLevel := int(-1) + bestScore := float64(-1) + + statFiles := make([]int, len(v.levels)) + statSizes := make([]string, len(v.levels)) + statScore := make([]string, len(v.levels)) + statTotSize := int64(0) + + for level, tables := range v.levels { + var score float64 + size := tables.size() + if level == 0 { + // We treat level-0 specially by bounding the number of files + // instead of number of bytes for two reasons: + // + // (1) With larger write-buffer sizes, it is nice not to do too + // many level-0 compaction. + // + // (2) The files in level-0 are merged on every read and + // therefore we wish to avoid too many files when the individual + // file size is small (perhaps because of a small write-buffer + // setting, or very high compression ratios, or lots of + // overwrites/deletions). + score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger()) + } else { + score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level)) + } + + if score > bestScore { + bestLevel = level + bestScore = score + } + + statFiles[level] = len(tables) + statSizes[level] = shortenb(int(size)) + statScore[level] = fmt.Sprintf("%.2f", score) + statTotSize += size + } + + v.cLevel = bestLevel + v.cScore = bestScore + + v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore) +} + +func (v *version) needCompaction() bool { + return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil +} + +type tablesScratch struct { + added map[int64]atRecord + deleted map[int64]struct{} +} + +type versionStaging struct { + base *version + levels []tablesScratch +} + +func (p *versionStaging) getScratch(level int) *tablesScratch { + if level >= len(p.levels) { + newLevels := make([]tablesScratch, level+1) + copy(newLevels, p.levels) + p.levels = newLevels + } + return &(p.levels[level]) +} + +func (p *versionStaging) commit(r *sessionRecord) { + // Deleted tables. + for _, r := range r.deletedTables { + scratch := p.getScratch(r.level) + if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 { + if scratch.deleted == nil { + scratch.deleted = make(map[int64]struct{}) + } + scratch.deleted[r.num] = struct{}{} + } + if scratch.added != nil { + delete(scratch.added, r.num) + } + } + + // New tables. + for _, r := range r.addedTables { + scratch := p.getScratch(r.level) + if scratch.added == nil { + scratch.added = make(map[int64]atRecord) + } + scratch.added[r.num] = r + if scratch.deleted != nil { + delete(scratch.deleted, r.num) + } + } +} + +func (p *versionStaging) finish() *version { + // Build new version. + nv := newVersion(p.base.s) + numLevel := len(p.levels) + if len(p.base.levels) > numLevel { + numLevel = len(p.base.levels) + } + nv.levels = make([]tFiles, numLevel) + for level := 0; level < numLevel; level++ { + var baseTabels tFiles + if level < len(p.base.levels) { + baseTabels = p.base.levels[level] + } + + if level < len(p.levels) { + scratch := p.levels[level] + + var nt tFiles + // Prealloc list if possible. + if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 { + nt = make(tFiles, 0, n) + } + + // Base tables. + for _, t := range baseTabels { + if _, ok := scratch.deleted[t.fd.Num]; ok { + continue + } + if _, ok := scratch.added[t.fd.Num]; ok { + continue + } + nt = append(nt, t) + } + + // New tables. + for _, r := range scratch.added { + nt = append(nt, tableFileFromRecord(r)) + } + + if len(nt) != 0 { + // Sort tables. + if level == 0 { + nt.sortByNum() + } else { + nt.sortByKey(p.base.s.icmp) + } + + nv.levels[level] = nt + } + } else { + nv.levels[level] = baseTabels + } + } + + // Trim levels. + n := len(nv.levels) + for ; n > 0 && nv.levels[n-1] == nil; n-- { + } + nv.levels = nv.levels[:n] + + // Compute compaction score for new version. + nv.computeCompaction() + + return nv +} + +type versionReleaser struct { + v *version + once bool +} + +func (vr *versionReleaser) Release() { + v := vr.v + v.s.vmu.Lock() + if !vr.once { + v.releaseNB() + vr.once = true + } + v.s.vmu.Unlock() +} |