@@ -16,14 +16,14 @@ require ( | |||
gitea.com/macaron/session v0.0.0-20190821211443-122c47c5f705 | |||
gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7 | |||
github.com/PuerkitoBio/goquery v1.5.0 | |||
github.com/RoaringBitmap/roaring v0.4.7 // indirect | |||
github.com/RoaringBitmap/roaring v0.4.21 // indirect | |||
github.com/bgentry/speakeasy v0.1.0 // indirect | |||
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 | |||
github.com/blevesearch/bleve v0.8.1 | |||
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect | |||
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f // indirect | |||
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc // indirect | |||
github.com/blevesearch/go-porterstemmer v1.0.2 // indirect | |||
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect | |||
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 // indirect | |||
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe // indirect | |||
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd // indirect | |||
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect | |||
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect | |||
github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect | |||
@@ -31,14 +31,13 @@ require ( | |||
github.com/dgrijalva/jwt-go v3.2.0+incompatible | |||
github.com/editorconfig/editorconfig-core-go/v2 v2.1.1 | |||
github.com/emirpasic/gods v1.12.0 | |||
github.com/etcd-io/bbolt v1.3.2 // indirect | |||
github.com/etcd-io/bbolt v1.3.3 // indirect | |||
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a | |||
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect | |||
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect | |||
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect | |||
github.com/gliderlabs/ssh v0.2.2 | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd // indirect | |||
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e // indirect | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect | |||
github.com/go-openapi/jsonreference v0.19.3 // indirect | |||
github.com/go-openapi/runtime v0.19.5 // indirect | |||
github.com/go-redis/redis v6.15.2+incompatible | |||
@@ -68,12 +67,10 @@ require ( | |||
github.com/mattn/go-sqlite3 v1.11.0 | |||
github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75 | |||
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a | |||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae // indirect | |||
github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc | |||
github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 | |||
github.com/niklasfasching/go-org v0.1.8 | |||
github.com/oliamb/cutter v0.2.2 | |||
github.com/philhofer/fwd v1.0.0 // indirect | |||
github.com/pkg/errors v0.8.1 | |||
github.com/pquerna/otp v0.0.0-20160912161815-54653902c20e | |||
github.com/prometheus/client_golang v1.1.0 | |||
@@ -90,19 +87,17 @@ require ( | |||
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect | |||
github.com/stretchr/testify v1.4.0 | |||
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 // indirect | |||
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 // indirect | |||
github.com/tstranex/u2f v1.0.0 | |||
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 | |||
github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e | |||
github.com/unknwon/i18n v0.0.0-20190805065654-5c6446a380b6 | |||
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 | |||
github.com/urfave/cli v1.20.0 | |||
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 // indirect | |||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 | |||
golang.org/x/crypto v0.0.0-20191117063200-497ca9f6d64f | |||
golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 | |||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 | |||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 | |||
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 | |||
golang.org/x/text v0.3.2 | |||
golang.org/x/tools v0.0.0-20190910221609-7f5965fd7709 // indirect | |||
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect |
@@ -46,8 +46,8 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN | |||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= | |||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= | |||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= | |||
github.com/RoaringBitmap/roaring v0.4.7 h1:eGUudvFzvF7Kxh7JjYvXfI1f7l22/2duFby7r5+d4oc= | |||
github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= | |||
github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8= | |||
github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo= | |||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= | |||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= | |||
github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68= | |||
@@ -72,14 +72,14 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= | |||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= | |||
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY= | |||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= | |||
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 h1:vinCy/rcjbtxWnMiw11CbMKcuyNi+y4L4MbZUpk7m4M= | |||
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw= | |||
github.com/blevesearch/bleve v0.8.1 h1:20zBREtGe8dvBxCC+717SaxKcUVQOWk3/Fm75vabKpU= | |||
github.com/blevesearch/bleve v0.8.1/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw= | |||
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA= | |||
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ= | |||
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f h1:J9ZVHbB2X6JNxbKw/f3Y4E9Xq+Ro+zPiivzgmi3RTvg= | |||
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA= | |||
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc h1:7OfDAkuAGx71ruzOIFqCkHqGIsVZU0C7PMw5u1bIrwU= | |||
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8= | |||
github.com/blevesearch/go-porterstemmer v1.0.2 h1:qe7n69gBd1OLY5sHKnxQHIbzn0LNJA4hpAf+5XDxV2I= | |||
github.com/blevesearch/go-porterstemmer v1.0.2/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA= | |||
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f h1:kqbi9lqXLLs+zfWlgo1PIiRQ86n33K1JKotjj4rSYOg= | |||
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8= | |||
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 h1:NGpwhs9FOwddM6TptNrq2ycby4s24TcppSe5uG4DA/Q= | |||
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= | |||
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA= | |||
@@ -92,6 +92,7 @@ github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkE | |||
github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= | |||
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= | |||
github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= | |||
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= | |||
github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= | |||
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= | |||
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= | |||
@@ -102,10 +103,11 @@ github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d h1:XMf4E1U+b | |||
github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d/go.mod h1:srVSlQLB8iXBVXHgnqemxUXqN6FCvClgCMPCsjBDR7c= | |||
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b h1:bZ9rKU2/V8sY+NulSfxDOnXTWcs1rySqdF1sVepihvo= | |||
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs= | |||
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe h1:2o6Y7KMjJNsuMTF8f2H2eTKRhqH7+bQbjr+D+LnhE5M= | |||
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe/go.mod h1:prYTC8EgTu3gwbqJihkud9zRXISvyulAplQ6exdCo1g= | |||
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd h1:zeuJhcG3f8eePshH3KxkNE+Xtl53pVln9MOUPMyr/1w= | |||
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd/go.mod h1:xbc8Ff/oG7h2ejd7AlwOpfd+6QZntc92ygpAOfGwcKY= | |||
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8= | |||
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc= | |||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= | |||
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= | |||
github.com/cupcake/rdb v0.0.0-20161107195141-43ba34106c76/go.mod h1:vYwsqCOLxGiisLwp9rITslkFNpZD5rz43tf41QFkTWY= | |||
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8= | |||
@@ -135,8 +137,8 @@ github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= | |||
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= | |||
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= | |||
github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= | |||
github.com/etcd-io/bbolt v1.3.2 h1:RLRQ0TKLX7DlBRXAJHvbmXL17Q3KNnTBtZ9B6Qo+/Y0= | |||
github.com/etcd-io/bbolt v1.3.2/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= | |||
github.com/etcd-io/bbolt v1.3.3 h1:gSJmxrs37LgTqR/oyJBWok6k6SvXEUerFTbltIhXkBM= | |||
github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= | |||
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a h1:M1bRpaZAn4GSsqu3hdK2R8H0AH9O6vqCTCbm2oAFGfE= | |||
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a/go.mod h1:MkKY/CB98aVE4VxO63X5vTQKUgcn+3XP15LMASe3lYs= | |||
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ= | |||
@@ -154,10 +156,11 @@ github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0= | |||
github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= | |||
github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= | |||
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd h1:r04MMPyLHj/QwZuMJ5+7tJcBr1AQjpiAK/rZWRrQT7o= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e h1:SiEs4J3BKVIeaWrH3tKaz3QLZhJ68iJ/A4xrzIoE5+Y= | |||
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= | |||
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= | |||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= | |||
@@ -279,6 +282,8 @@ github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORR | |||
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= | |||
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw= | |||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= | |||
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= | |||
github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg= | |||
@@ -304,6 +309,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= | |||
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= | |||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= | |||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= | |||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= | |||
github.com/issue9/assert v1.3.2 h1:IaTa37u4m1fUuTH9K9ldO5IONKVDXjLiUO1T9vj0OF0= | |||
github.com/issue9/assert v1.3.2/go.mod h1:9Ger+iz8X7r1zMYYwEhh++2wMGWcNN2oVI+zIQXxcio= | |||
github.com/issue9/identicon v0.0.0-20160320065130-d36b54562f4c h1:A/PDn117UYld5mlxe58EpMguqpkeTMw5/FCo0ZPS/Ko= | |||
@@ -467,6 +473,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qq | |||
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= | |||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= | |||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= | |||
github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo= | |||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= | |||
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= | |||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= | |||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= | |||
@@ -504,11 +512,13 @@ github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= | |||
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= | |||
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= | |||
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= | |||
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= | |||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= | |||
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= | |||
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= | |||
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= | |||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= | |||
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= | |||
github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= | |||
github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= | |||
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4= | |||
@@ -529,8 +539,8 @@ github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 h1:HOxvxvnntLiPn1 | |||
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8= | |||
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= | |||
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= | |||
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 h1:ZVvr38DYEyOPyelySqvF0I9I++85NnUMsWkroBDS4fs= | |||
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= | |||
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= | |||
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ= | |||
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM= | |||
@@ -538,6 +548,7 @@ github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ= | |||
github.com/tstranex/u2f v1.0.0/go.mod h1:eahSLaqAS0zsIEv80+vXT7WanXs7MQQDg3j3wGBSayo= | |||
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= | |||
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= | |||
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= | |||
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= | |||
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 h1:SpoCl3+Pta5/ubQyF+Fmx65obtpfkyzeaOIneCE3MTw= | |||
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1/go.mod h1:QaSeRctcea9fK6piJpAMCCPKxzJ01+xFcr2k1m3WRPU= | |||
@@ -549,8 +560,8 @@ github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 h1:Z79lyIznnziKA | |||
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141/go.mod h1:TBwoao3Q4Eb/cp+dHbXDfRTrZSsj/k7kLr2j1oWRWC0= | |||
github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw= | |||
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= | |||
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 h1:E8u341JM/N8LCnPXBV6ZFD1RKo/j+qHl1XOqSV+GstA= | |||
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= | |||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70= | |||
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4= | |||
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= | |||
@@ -574,6 +585,7 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/ | |||
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= | |||
golang.org/x/crypto v0.0.0-20180820150726-614d502a4dac/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= | |||
golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= | |||
@@ -642,6 +654,7 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h | |||
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
@@ -660,6 +673,8 @@ golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7w | |||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY= | |||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 h1:/J2nHFg1MTqaRLFO7M+J78ASNsJoz3r0cvHBPQ77fsE= | |||
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= |
@@ -0,0 +1,20 @@ | |||
kind: pipeline | |||
name: default | |||
workspace: | |||
base: /go | |||
path: src/github.com/RoaringBitmap/roaring | |||
steps: | |||
- name: test | |||
image: golang | |||
commands: | |||
- go get -t | |||
- go test | |||
- go test -race -run TestConcurrent* | |||
- go build -tags appengine | |||
- go test -tags appengine | |||
- GOARCH=386 go build | |||
- GOARCH=386 go test | |||
- GOARCH=arm go build | |||
- GOARCH=arm64 go build |
@@ -8,10 +8,12 @@ install: | |||
notifications: | |||
email: false | |||
go: | |||
- 1.7.x | |||
- 1.8.x | |||
- 1.9.x | |||
- 1.10.x | |||
- "1.7.x" | |||
- "1.8.x" | |||
- "1.9.x" | |||
- "1.10.x" | |||
- "1.11.x" | |||
- "1.12.x" | |||
- tip | |||
# whitelist | |||
@@ -21,10 +23,14 @@ branches: | |||
script: | |||
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test | |||
- go test -race -run TestConcurrent* | |||
- go build -tags appengine | |||
- go test -tags appengine | |||
- GOARCH=arm64 go build | |||
- GOARCH=386 go build | |||
- GOARCH=386 go test | |||
- GOARCH=arm go build | |||
- GOARCH=arm64 go build | |||
matrix: | |||
allow_failures: | |||
- go: tip |
@@ -7,4 +7,5 @@ Bob Potter (@bpot), | |||
Tyson Maly (@tvmaly), | |||
Will Glynn (@willglynn), | |||
Brent Pedersen (@brentp) | |||
Maciej Biłas (@maciej) | |||
Maciej Biłas (@maciej), | |||
Joe Nall (@joenall) |
@@ -9,4 +9,8 @@ Will Glynn (@willglynn), | |||
Brent Pedersen (@brentp), | |||
Jason E. Aten (@glycerine), | |||
Vali Malinoiu (@0x4139), | |||
Forud Ghafouri (@fzerorubigd) | |||
Forud Ghafouri (@fzerorubigd), | |||
Joe Nall (@joenall), | |||
(@fredim), | |||
Edd Robinson (@e-dard), | |||
Alexander Petrov (@alldroll) |
@@ -200,3 +200,36 @@ | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
================================================================================ | |||
Portions of runcontainer.go are from the Go standard library, which is licensed | |||
under: | |||
Copyright (c) 2009 The Go Authors. All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above | |||
copyright notice, this list of conditions and the following disclaimer | |||
in the documentation and/or other materials provided with the | |||
distribution. | |||
* Neither the name of Google Inc. nor the names of its | |||
contributors may be used to endorse or promote products derived from | |||
this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@@ -1,4 +1,4 @@ | |||
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets | |||
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets | |||
@@ -63,7 +63,7 @@ qa: fmtcheck test vet lint | |||
# Get the dependencies | |||
deps: | |||
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey | |||
GOPATH=$(GOPATH) go get github.com/stretchr/testify | |||
GOPATH=$(GOPATH) go get github.com/willf/bitset | |||
GOPATH=$(GOPATH) go get github.com/golang/lint/golint | |||
GOPATH=$(GOPATH) go get github.com/mschoch/smat | |||
@@ -97,18 +97,8 @@ nuke: | |||
rm -rf ./target | |||
GOPATH=$(GOPATH) go clean -i ./... | |||
rle: | |||
cp rle.go rle16.go | |||
perl -pi -e 's/32/16/g' rle16.go | |||
cp rle_test.go rle16_test.go | |||
perl -pi -e 's/32/16/g' rle16_test.go | |||
backrle: | |||
cp rle16.go rle.go | |||
perl -pi -e 's/16/32/g' rle.go | |||
perl -pi -e 's/2032/2016/g' rle.go | |||
ser: rle | |||
ser: | |||
go generate | |||
cover: |
@@ -1,4 +1,5 @@ | |||
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) | |||
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring) | |||
============= | |||
This is a go version of the Roaring bitmap data structure. | |||
@@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure. | |||
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and | |||
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. | |||
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. | |||
[lucene]: https://lucene.apache.org/ | |||
[solr]: https://lucene.apache.org/solr/ | |||
[elasticsearch]: https://www.elastic.co/products/elasticsearch | |||
[druid]: http://druid.io/ | |||
[druid]: https://druid.apache.org/ | |||
[spark]: https://spark.apache.org/ | |||
[opensearchserver]: http://www.opensearchserver.com | |||
[cloudtorrent]: https://github.com/jpillora/cloud-torrent | |||
@@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r | |||
Dependencies are fetched automatically by giving the `-t` flag to `go get`. | |||
they include | |||
- github.com/smartystreets/goconvey/convey | |||
- github.com/willf/bitset | |||
- github.com/mschoch/smat | |||
- github.com/glycerine/go-unsnap-stream | |||
@@ -133,6 +133,7 @@ func main() { | |||
if rb1.Equals(newrb) { | |||
fmt.Println("I wrote the content to a byte stream and read it back.") | |||
} | |||
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator() | |||
} | |||
``` | |||
@@ -206,7 +207,7 @@ You can use roaring with gore: | |||
- go get -u github.com/motemen/gore | |||
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``. | |||
- go get github/RoaringBitmap/roaring | |||
- go get github.com/RoaringBitmap/roaring | |||
```go | |||
$ gore |
@@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin | |||
} | |||
} | |||
func (ac *arrayContainer) getShortIterator() shortIterable { | |||
func (ac *arrayContainer) getShortIterator() shortPeekable { | |||
return &shortIterator{ac.content, 0} | |||
} | |||
func (ac *arrayContainer) getReverseIterator() shortIterable { | |||
return &reverseIterator{ac.content, len(ac.content) - 1} | |||
} | |||
func (ac *arrayContainer) getManyIterator() manyIterable { | |||
return &manyIterator{ac.content, 0} | |||
return &shortIterator{ac.content, 0} | |||
} | |||
func (ac *arrayContainer) minimum() uint16 { | |||
@@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container { | |||
// flip the values in the range [firstOfRange,endx) | |||
func (ac *arrayContainer) not(firstOfRange, endx int) container { | |||
if firstOfRange >= endx { | |||
//p("arrayContainer.not(): exiting early with ac.clone()") | |||
return ac.clone() | |||
} | |||
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] | |||
@@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container { | |||
// flip the values in the range [firstOfRange,lastOfRange] | |||
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { | |||
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] | |||
//p("arrayContainer.notClose(): exiting early with ac.clone()") | |||
return ac.clone() | |||
} | |||
// determine the span of array indices to be affected^M | |||
startIndex := binarySearch(ac.content, uint16(firstOfRange)) | |||
//p("startIndex=%v", startIndex) | |||
if startIndex < 0 { | |||
startIndex = -startIndex - 1 | |||
} | |||
lastIndex := binarySearch(ac.content, uint16(lastOfRange)) | |||
//p("lastIndex=%v", lastIndex) | |||
if lastIndex < 0 { | |||
lastIndex = -lastIndex - 2 | |||
} | |||
@@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { | |||
newValuesInRange := spanToBeFlipped - currentValuesInRange | |||
cardinalityChange := newValuesInRange - currentValuesInRange | |||
newCardinality := len(ac.content) + cardinalityChange | |||
//p("new card is %v", newCardinality) | |||
if newCardinality > arrayDefaultMaxSize { | |||
//p("new card over arrayDefaultMaxSize, so returning bitmap") | |||
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1) | |||
} | |||
answer := newArrayContainer() | |||
@@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container { | |||
} | |||
func (ac *arrayContainer) and(a container) container { | |||
//p("ac.and() called") | |||
switch x := a.(type) { | |||
case *arrayContainer: | |||
return ac.andArray(x) | |||
@@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container { | |||
return ac.iandBitmap(x) | |||
case *runContainer16: | |||
if x.isFull() { | |||
return ac.clone() | |||
return ac | |||
} | |||
return x.andArray(ac) | |||
} | |||
@@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container { | |||
// flip the values in the range [firstOfRange,lastOfRange] | |||
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { | |||
//p("ac.inotClose() starting") | |||
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] | |||
return ac | |||
} | |||
@@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { | |||
if cardinalityChange > 0 { | |||
if newCardinality > len(ac.content) { | |||
if newCardinality > arrayDefaultMaxSize { | |||
//p("ac.inotClose() converting to bitmap and doing inot there") | |||
bcRet := ac.toBitmapContainer() | |||
bcRet.inot(firstOfRange, lastOfRange+1) | |||
*ac = *bcRet.toArrayContainer() | |||
@@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { | |||
} | |||
} | |||
ac.content = ac.content[:newCardinality] | |||
//p("bottom of ac.inotClose(): returning ac") | |||
return ac | |||
} | |||
@@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container { | |||
func (ac *arrayContainer) containerType() contype { | |||
return arrayContype | |||
} | |||
func (ac *arrayContainer) addOffset(x uint16) []container { | |||
low := &arrayContainer{} | |||
high := &arrayContainer{} | |||
for _, val := range ac.content { | |||
y := uint32(val) + uint32(x) | |||
if highbits(y) > 0 { | |||
high.content = append(high.content, lowbits(y)) | |||
} else { | |||
low.content = append(low.content, lowbits(y)) | |||
} | |||
} | |||
return []container{low, high} | |||
} |
@@ -6,7 +6,7 @@ package roaring | |||
import "github.com/tinylib/msgp/msgp" | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 1 | |||
// write "content" | |||
@@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 1 | |||
@@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *arrayContainer) Msgsize() (s int) { | |||
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size)) | |||
return |
@@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool { | |||
return bcsi.i >= 0 | |||
} | |||
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 { | |||
return uint16(bcsi.i) | |||
} | |||
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) { | |||
if bcsi.hasNext() && bcsi.peekNext() < minval { | |||
bcsi.i = bcsi.ptr.NextSetBit(int(minval)) | |||
} | |||
} | |||
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { | |||
return &bitmapContainerShortIterator{a, a.NextSetBit(0)} | |||
} | |||
func (bc *bitmapContainer) getShortIterator() shortIterable { | |||
func (bc *bitmapContainer) getShortIterator() shortPeekable { | |||
return newBitmapContainerShortIterator(bc) | |||
} | |||
type reverseBitmapContainerShortIterator struct { | |||
ptr *bitmapContainer | |||
i int | |||
} | |||
func (bcsi *reverseBitmapContainerShortIterator) next() uint16 { | |||
if bcsi.i == -1 { | |||
panic("reverseBitmapContainerShortIterator.next() going beyond what is available") | |||
} | |||
j := bcsi.i | |||
bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1) | |||
return uint16(j) | |||
} | |||
func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool { | |||
return bcsi.i >= 0 | |||
} | |||
func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator { | |||
if a.cardinality == 0 { | |||
return &reverseBitmapContainerShortIterator{a, -1} | |||
} | |||
return &reverseBitmapContainerShortIterator{a, int(a.maximum())} | |||
} | |||
func (bc *bitmapContainer) getReverseIterator() shortIterable { | |||
return newReverseBitmapContainerShortIterator(bc) | |||
} | |||
type bitmapContainerManyIterator struct { | |||
ptr *bitmapContainer | |||
base int | |||
@@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int { | |||
for n < len(buf) { | |||
if bitset == 0 { | |||
base += 1 | |||
base++ | |||
if base >= len(bcmi.ptr.bitmap) { | |||
bcmi.base = base | |||
bcmi.bitset = bitset | |||
@@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int { | |||
func bitmapEquals(a, b []uint64) bool { | |||
if len(a) != len(b) { | |||
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b)) | |||
return false | |||
} | |||
for i, v := range a { | |||
if v != b[i] { | |||
//p("bitmaps differ on element i=%v", i) | |||
return false | |||
} | |||
} | |||
//p("bitmapEquals returning true") | |||
return true | |||
} | |||
@@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui | |||
func (bc *bitmapContainer) equals(o container) bool { | |||
srb, ok := o.(*bitmapContainer) | |||
if ok { | |||
//p("bitmapContainers.equals: both are bitmapContainers") | |||
if srb.cardinality != bc.cardinality { | |||
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality) | |||
return false | |||
} | |||
return bitmapEquals(bc.bitmap, srb.bitmap) | |||
@@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container { | |||
// iremove returns true if i was found. | |||
func (bc *bitmapContainer) iremove(i uint16) bool { | |||
/* branchless code | |||
w := bc.bitmap[i>>6] | |||
mask := uint64(1) << (i % 64) | |||
neww := w &^ mask | |||
bc.cardinality -= int((w ^ neww) >> (i % 64)) | |||
bc.bitmap[i>>6] = neww */ | |||
if bc.contains(i) { | |||
bc.cardinality-- | |||
bc.bitmap[i/64] &^= (uint64(1) << (i % 64)) | |||
@@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container | |||
// flip all values in range [firstOfRange,endx) | |||
func (bc *bitmapContainer) inot(firstOfRange, endx int) container { | |||
p("bc.inot() called with [%v, %v)", firstOfRange, endx) | |||
if endx-firstOfRange == maxCapacity { | |||
//p("endx-firstOfRange == maxCapacity") | |||
flipBitmapRange(bc.bitmap, firstOfRange, endx) | |||
bc.cardinality = maxCapacity - bc.cardinality | |||
//p("bc.cardinality is now %v", bc.cardinality) | |||
} else if endx-firstOfRange > maxCapacity/2 { | |||
//p("endx-firstOfRange > maxCapacity/2") | |||
flipBitmapRange(bc.bitmap, firstOfRange, endx) | |||
bc.computeCardinality() | |||
} else { | |||
@@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container { | |||
func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container { | |||
answer := bc | |||
c := value2.getCardinality() | |||
for k := 0; k < c; k++ { | |||
for k := 0; k+3 < c; k += 4 { | |||
content := (*[4]uint16)(unsafe.Pointer(&value2.content[k])) | |||
vc0 := content[0] | |||
i0 := uint(vc0) >> 6 | |||
answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64)) | |||
vc1 := content[1] | |||
i1 := uint(vc1) >> 6 | |||
answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64)) | |||
vc2 := content[2] | |||
i2 := uint(vc2) >> 6 | |||
answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64)) | |||
vc3 := content[3] | |||
i3 := uint(vc3) >> 6 | |||
answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64)) | |||
} | |||
for k := c &^ 3; k < c; k++ { | |||
vc := value2.content[k] | |||
i := uint(vc) >> 6 | |||
answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64)) | |||
} | |||
answer.cardinality = invalidCardinality | |||
return answer | |||
} | |||
@@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container { | |||
} | |||
func (bc *bitmapContainer) iandNot(a container) container { | |||
//p("bitmapContainer.iandNot() starting") | |||
switch x := a.(type) { | |||
case *arrayContainer: | |||
return bc.iandNotArray(x) | |||
@@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container { | |||
return ac | |||
} | |||
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer { | |||
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container { | |||
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) | |||
for k := 0; k < len(bc.bitmap); k++ { | |||
bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] | |||
} | |||
bc.cardinality = newCardinality | |||
if bc.getCardinality() <= arrayDefaultMaxSize { | |||
return bc.toArrayContainer() | |||
} | |||
return bc | |||
} | |||
@@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int { | |||
return -1 | |||
} | |||
func (bc *bitmapContainer) PrevSetBit(i int) int { | |||
if i < 0 { | |||
return -1 | |||
} | |||
x := i / 64 | |||
if x >= len(bc.bitmap) { | |||
return -1 | |||
} | |||
w := bc.bitmap[x] | |||
b := i % 64 | |||
w = w << uint(63-b) | |||
if w != 0 { | |||
return i - countLeadingZeros(w) | |||
} | |||
x-- | |||
for ; x >= 0; x-- { | |||
if bc.bitmap[x] != 0 { | |||
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x]) | |||
} | |||
} | |||
return -1 | |||
} | |||
// reference the java implementation | |||
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 | |||
// | |||
@@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer { | |||
func (bc *bitmapContainer) containerType() contype { | |||
return bitmapContype | |||
} | |||
func (bc *bitmapContainer) addOffset(x uint16) []container { | |||
low := newBitmapContainer() | |||
high := newBitmapContainer() | |||
b := uint32(x) >> 6 | |||
i := uint32(x) % 64 | |||
end := uint32(1024) - b | |||
if i == 0 { | |||
copy(low.bitmap[b:], bc.bitmap[:end]) | |||
copy(high.bitmap[:b], bc.bitmap[end:]) | |||
} else { | |||
low.bitmap[b] = bc.bitmap[0] << i | |||
for k := uint32(1); k < end; k++ { | |||
newval := bc.bitmap[k] << i | |||
if newval == 0 { | |||
newval = bc.bitmap[k-1] >> (64 - i) | |||
} | |||
low.bitmap[b+k] = newval | |||
} | |||
for k := end; k < 1024; k++ { | |||
newval := bc.bitmap[k] << i | |||
if newval == 0 { | |||
newval = bc.bitmap[k-1] >> (64 - i) | |||
} | |||
high.bitmap[k-end] = newval | |||
} | |||
high.bitmap[b] = bc.bitmap[1023] >> (64 - i) | |||
} | |||
low.computeCardinality() | |||
high.computeCardinality() | |||
return []container{low, high} | |||
} |
@@ -6,7 +6,7 @@ package roaring | |||
import "github.com/tinylib/msgp/msgp" | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 2 | |||
// write "cardinality" | |||
@@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 2 | |||
@@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *bitmapContainer) Msgsize() (s int) { | |||
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size)) | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 2 | |||
// write "ptr" | |||
@@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 2 | |||
@@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *bitmapContainerShortIterator) Msgsize() (s int) { | |||
s = 1 + 4 | |||
if z.ptr == nil { |
@@ -0,0 +1,161 @@ | |||
package roaring | |||
import ( | |||
"encoding/binary" | |||
"io" | |||
) | |||
type byteInput interface { | |||
// next returns a slice containing the next n bytes from the buffer, | |||
// advancing the buffer as if the bytes had been returned by Read. | |||
next(n int) ([]byte, error) | |||
// readUInt32 reads uint32 with LittleEndian order | |||
readUInt32() (uint32, error) | |||
// readUInt16 reads uint16 with LittleEndian order | |||
readUInt16() (uint16, error) | |||
// getReadBytes returns read bytes | |||
getReadBytes() int64 | |||
// skipBytes skips exactly n bytes | |||
skipBytes(n int) error | |||
} | |||
func newByteInputFromReader(reader io.Reader) byteInput { | |||
return &byteInputAdapter{ | |||
r: reader, | |||
readBytes: 0, | |||
} | |||
} | |||
func newByteInput(buf []byte) byteInput { | |||
return &byteBuffer{ | |||
buf: buf, | |||
off: 0, | |||
} | |||
} | |||
type byteBuffer struct { | |||
buf []byte | |||
off int | |||
} | |||
// next returns a slice containing the next n bytes from the reader | |||
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned | |||
func (b *byteBuffer) next(n int) ([]byte, error) { | |||
m := len(b.buf) - b.off | |||
if n > m { | |||
return nil, io.ErrUnexpectedEOF | |||
} | |||
data := b.buf[b.off : b.off+n] | |||
b.off += n | |||
return data, nil | |||
} | |||
// readUInt32 reads uint32 with LittleEndian order | |||
func (b *byteBuffer) readUInt32() (uint32, error) { | |||
if len(b.buf)-b.off < 4 { | |||
return 0, io.ErrUnexpectedEOF | |||
} | |||
v := binary.LittleEndian.Uint32(b.buf[b.off:]) | |||
b.off += 4 | |||
return v, nil | |||
} | |||
// readUInt16 reads uint16 with LittleEndian order | |||
func (b *byteBuffer) readUInt16() (uint16, error) { | |||
if len(b.buf)-b.off < 2 { | |||
return 0, io.ErrUnexpectedEOF | |||
} | |||
v := binary.LittleEndian.Uint16(b.buf[b.off:]) | |||
b.off += 2 | |||
return v, nil | |||
} | |||
// getReadBytes returns read bytes | |||
func (b *byteBuffer) getReadBytes() int64 { | |||
return int64(b.off) | |||
} | |||
// skipBytes skips exactly n bytes | |||
func (b *byteBuffer) skipBytes(n int) error { | |||
m := len(b.buf) - b.off | |||
if n > m { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b.off += n | |||
return nil | |||
} | |||
// reset resets the given buffer with a new byte slice | |||
func (b *byteBuffer) reset(buf []byte) { | |||
b.buf = buf | |||
b.off = 0 | |||
} | |||
type byteInputAdapter struct { | |||
r io.Reader | |||
readBytes int | |||
} | |||
// next returns a slice containing the next n bytes from the buffer, | |||
// advancing the buffer as if the bytes had been returned by Read. | |||
func (b *byteInputAdapter) next(n int) ([]byte, error) { | |||
buf := make([]byte, n) | |||
m, err := io.ReadAtLeast(b.r, buf, n) | |||
b.readBytes += m | |||
if err != nil { | |||
return nil, err | |||
} | |||
return buf, nil | |||
} | |||
// readUInt32 reads uint32 with LittleEndian order | |||
func (b *byteInputAdapter) readUInt32() (uint32, error) { | |||
buf, err := b.next(4) | |||
if err != nil { | |||
return 0, err | |||
} | |||
return binary.LittleEndian.Uint32(buf), nil | |||
} | |||
// readUInt16 reads uint16 with LittleEndian order | |||
func (b *byteInputAdapter) readUInt16() (uint16, error) { | |||
buf, err := b.next(2) | |||
if err != nil { | |||
return 0, err | |||
} | |||
return binary.LittleEndian.Uint16(buf), nil | |||
} | |||
// getReadBytes returns read bytes | |||
func (b *byteInputAdapter) getReadBytes() int64 { | |||
return int64(b.readBytes) | |||
} | |||
// skipBytes skips exactly n bytes | |||
func (b *byteInputAdapter) skipBytes(n int) error { | |||
_, err := b.next(n) | |||
return err | |||
} | |||
// reset resets the given buffer with a new stream | |||
func (b *byteInputAdapter) reset(stream io.Reader) { | |||
b.r = stream | |||
b.readBytes = 0 | |||
} |
@@ -0,0 +1,11 @@ | |||
// +build go1.9 | |||
// "go1.9", from Go version 1.9 onward | |||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints | |||
package roaring | |||
import "math/bits" | |||
func countLeadingZeros(x uint64) int { | |||
return bits.LeadingZeros64(x) | |||
} |
@@ -0,0 +1,36 @@ | |||
// +build !go1.9 | |||
package roaring | |||
// LeadingZeroBits returns the number of consecutive most significant zero | |||
// bits of x. | |||
func countLeadingZeros(i uint64) int { | |||
if i == 0 { | |||
return 64 | |||
} | |||
n := 1 | |||
x := uint32(i >> 32) | |||
if x == 0 { | |||
n += 32 | |||
x = uint32(i) | |||
} | |||
if (x >> 16) == 0 { | |||
n += 16 | |||
x <<= 16 | |||
} | |||
if (x >> 24) == 0 { | |||
n += 8 | |||
x <<= 8 | |||
} | |||
if x>>28 == 0 { | |||
n += 4 | |||
x <<= 4 | |||
} | |||
if x>>30 == 0 { | |||
n += 2 | |||
x <<= 2 | |||
} | |||
n -= int(x >> 31) | |||
return n | |||
} |
@@ -0,0 +1,16 @@ | |||
module github.com/RoaringBitmap/roaring | |||
go 1.12 | |||
require ( | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect | |||
github.com/golang/snappy v0.0.1 // indirect | |||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect | |||
github.com/jtolds/gls v4.20.0+incompatible // indirect | |||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae | |||
github.com/philhofer/fwd v1.0.0 // indirect | |||
github.com/stretchr/testify v1.4.0 | |||
github.com/tinylib/msgp v1.1.0 | |||
github.com/willf/bitset v1.1.10 | |||
) |
@@ -0,0 +1,30 @@ | |||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | |||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4= | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= | |||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= | |||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw= | |||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= | |||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= | |||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= | |||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY= | |||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= | |||
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ= | |||
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= | |||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | |||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | |||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= | |||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | |||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= | |||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= | |||
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU= | |||
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= | |||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | |||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | |||
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= | |||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= |
@@ -4,12 +4,7 @@ type manyIterable interface { | |||
nextMany(hs uint32, buf []uint32) int | |||
} | |||
type manyIterator struct { | |||
slice []uint16 | |||
loc int | |||
} | |||
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int { | |||
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int { | |||
n := 0 | |||
l := si.loc | |||
s := si.slice |
@@ -143,8 +143,8 @@ func toBitmapContainer(c container) container { | |||
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { | |||
expectedKeys := -1 | |||
appendedKeys := 0 | |||
keys := make([]uint16, 0) | |||
containers := make([]container, 0) | |||
var keys []uint16 | |||
var containers []container | |||
for appendedKeys != expectedKeys { | |||
select { | |||
case item := <-resultChan: | |||
@@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap { | |||
// (if it is set to 0, a default number of workers is chosen) | |||
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { | |||
var lKey uint16 = MaxUint16 | |||
var hKey uint16 = 0 | |||
var hKey uint16 | |||
bitmapsFiltered := bitmaps[:0] | |||
for _, b := range bitmaps { |
@@ -1,163 +0,0 @@ | |||
package roaring | |||
import ( | |||
"fmt" | |||
) | |||
// common to rle32.go and rle16.go | |||
// rleVerbose controls whether p() prints show up. | |||
// The testing package sets this based on | |||
// testing.Verbose(). | |||
var rleVerbose bool | |||
// p is a shorthand for fmt.Printf with beginning and | |||
// trailing newlines. p() makes it easy | |||
// to add diagnostic print statements. | |||
func p(format string, args ...interface{}) { | |||
if rleVerbose { | |||
fmt.Printf("\n"+format+"\n", args...) | |||
} | |||
} | |||
// MaxUint32 is the largest uint32 value. | |||
const MaxUint32 = 4294967295 | |||
// MaxUint16 is the largest 16 bit unsigned int. | |||
// This is the largest value an interval16 can store. | |||
const MaxUint16 = 65535 | |||
// searchOptions allows us to accelerate runContainer32.search with | |||
// prior knowledge of (mostly lower) bounds. This is used by Union | |||
// and Intersect. | |||
type searchOptions struct { | |||
// start here instead of at 0 | |||
startIndex int64 | |||
// upper bound instead of len(rc.iv); | |||
// endxIndex == 0 means ignore the bound and use | |||
// endxIndex == n ==len(rc.iv) which is also | |||
// naturally the default for search() | |||
// when opt = nil. | |||
endxIndex int64 | |||
} | |||
// And finds the intersection of rc and b. | |||
func (rc *runContainer32) And(b *Bitmap) *Bitmap { | |||
out := NewBitmap() | |||
for _, p := range rc.iv { | |||
for i := p.start; i <= p.last; i++ { | |||
if b.Contains(i) { | |||
out.Add(i) | |||
} | |||
} | |||
} | |||
return out | |||
} | |||
// Xor returns the exclusive-or of rc and b. | |||
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap { | |||
out := b.Clone() | |||
for _, p := range rc.iv { | |||
for v := p.start; v <= p.last; v++ { | |||
if out.Contains(v) { | |||
out.RemoveRange(uint64(v), uint64(v+1)) | |||
} else { | |||
out.Add(v) | |||
} | |||
} | |||
} | |||
return out | |||
} | |||
// Or returns the union of rc and b. | |||
func (rc *runContainer32) Or(b *Bitmap) *Bitmap { | |||
out := b.Clone() | |||
for _, p := range rc.iv { | |||
for v := p.start; v <= p.last; v++ { | |||
out.Add(v) | |||
} | |||
} | |||
return out | |||
} | |||
// trial is used in the randomized testing of runContainers | |||
type trial struct { | |||
n int | |||
percentFill float64 | |||
ntrial int | |||
// only in the union test | |||
// only subtract test | |||
percentDelete float64 | |||
// only in 067 randomized operations | |||
// we do this + 1 passes | |||
numRandomOpsPass int | |||
// allow sampling range control | |||
// only recent tests respect this. | |||
srang *interval16 | |||
} | |||
// And finds the intersection of rc and b. | |||
func (rc *runContainer16) And(b *Bitmap) *Bitmap { | |||
out := NewBitmap() | |||
for _, p := range rc.iv { | |||
plast := p.last() | |||
for i := p.start; i <= plast; i++ { | |||
if b.Contains(uint32(i)) { | |||
out.Add(uint32(i)) | |||
} | |||
} | |||
} | |||
return out | |||
} | |||
// Xor returns the exclusive-or of rc and b. | |||
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap { | |||
out := b.Clone() | |||
for _, p := range rc.iv { | |||
plast := p.last() | |||
for v := p.start; v <= plast; v++ { | |||
w := uint32(v) | |||
if out.Contains(w) { | |||
out.RemoveRange(uint64(w), uint64(w+1)) | |||
} else { | |||
out.Add(w) | |||
} | |||
} | |||
} | |||
return out | |||
} | |||
// Or returns the union of rc and b. | |||
func (rc *runContainer16) Or(b *Bitmap) *Bitmap { | |||
out := b.Clone() | |||
for _, p := range rc.iv { | |||
plast := p.last() | |||
for v := p.start; v <= plast; v++ { | |||
out.Add(uint32(v)) | |||
} | |||
} | |||
return out | |||
} | |||
//func (rc *runContainer32) and(container) container { | |||
// panic("TODO. not yet implemented") | |||
//} | |||
// serializedSizeInBytes returns the number of bytes of memory | |||
// required by this runContainer16. This is for the | |||
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/ | |||
func (rc *runContainer16) serializedSizeInBytes() int { | |||
// number of runs in one uint16, then each run | |||
// needs two more uint16 | |||
return 2 + len(rc.iv)*4 | |||
} | |||
// serializedSizeInBytes returns the number of bytes of memory | |||
// required by this runContainer32. | |||
func (rc *runContainer32) serializedSizeInBytes() int { | |||
return 4 + len(rc.iv)*8 | |||
} |
@@ -1,695 +0,0 @@ | |||
package roaring | |||
/////////////////////////////////////////////////// | |||
// | |||
// container interface methods for runContainer16 | |||
// | |||
/////////////////////////////////////////////////// | |||
import ( | |||
"fmt" | |||
) | |||
// compile time verify we meet interface requirements | |||
var _ container = &runContainer16{} | |||
func (rc *runContainer16) clone() container { | |||
return newRunContainer16CopyIv(rc.iv) | |||
} | |||
func (rc *runContainer16) minimum() uint16 { | |||
return rc.iv[0].start // assume not empty | |||
} | |||
func (rc *runContainer16) maximum() uint16 { | |||
return rc.iv[len(rc.iv)-1].last() // assume not empty | |||
} | |||
func (rc *runContainer16) isFull() bool { | |||
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) | |||
} | |||
func (rc *runContainer16) and(a container) container { | |||
if rc.isFull() { | |||
return a.clone() | |||
} | |||
switch c := a.(type) { | |||
case *runContainer16: | |||
return rc.intersect(c) | |||
case *arrayContainer: | |||
return rc.andArray(c) | |||
case *bitmapContainer: | |||
return rc.andBitmapContainer(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
func (rc *runContainer16) andCardinality(a container) int { | |||
switch c := a.(type) { | |||
case *runContainer16: | |||
return int(rc.intersectCardinality(c)) | |||
case *arrayContainer: | |||
return rc.andArrayCardinality(c) | |||
case *bitmapContainer: | |||
return rc.andBitmapContainerCardinality(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
// andBitmapContainer finds the intersection of rc and b. | |||
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { | |||
bc2 := newBitmapContainerFromRun(rc) | |||
return bc2.andBitmap(bc) | |||
} | |||
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { | |||
pos := 0 | |||
answer := 0 | |||
maxpos := ac.getCardinality() | |||
if maxpos == 0 { | |||
return 0 // won't happen in actual code | |||
} | |||
v := ac.content[pos] | |||
mainloop: | |||
for _, p := range rc.iv { | |||
for v < p.start { | |||
pos++ | |||
if pos == maxpos { | |||
break mainloop | |||
} | |||
v = ac.content[pos] | |||
} | |||
for v <= p.last() { | |||
answer++ | |||
pos++ | |||
if pos == maxpos { | |||
break mainloop | |||
} | |||
v = ac.content[pos] | |||
} | |||
} | |||
return answer | |||
} | |||
func (rc *runContainer16) iand(a container) container { | |||
if rc.isFull() { | |||
return a.clone() | |||
} | |||
switch c := a.(type) { | |||
case *runContainer16: | |||
return rc.inplaceIntersect(c) | |||
case *arrayContainer: | |||
return rc.andArray(c) | |||
case *bitmapContainer: | |||
return rc.iandBitmapContainer(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { | |||
// TODO: optimize by doing less allocation, possibly? | |||
// sect will be new | |||
sect := rc.intersect(rc2) | |||
*rc = *sect | |||
return rc | |||
} | |||
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { | |||
isect := rc.andBitmapContainer(bc) | |||
*rc = *newRunContainer16FromContainer(isect) | |||
return rc | |||
} | |||
func (rc *runContainer16) andArray(ac *arrayContainer) container { | |||
if len(rc.iv) == 0 { | |||
return newArrayContainer() | |||
} | |||
acCardinality := ac.getCardinality() | |||
c := newArrayContainerCapacity(acCardinality) | |||
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { | |||
iv := rc.iv[rlePos] | |||
arrayVal := ac.content[arrayPos] | |||
for iv.last() < arrayVal { | |||
rlePos++ | |||
if rlePos == len(rc.iv) { | |||
return c | |||
} | |||
iv = rc.iv[rlePos] | |||
} | |||
if iv.start > arrayVal { | |||
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) | |||
} else { | |||
c.content = append(c.content, arrayVal) | |||
arrayPos++ | |||
} | |||
} | |||
return c | |||
} | |||
func (rc *runContainer16) andNot(a container) container { | |||
switch c := a.(type) { | |||
case *arrayContainer: | |||
return rc.andNotArray(c) | |||
case *bitmapContainer: | |||
return rc.andNotBitmap(c) | |||
case *runContainer16: | |||
return rc.andNotRunContainer16(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { | |||
k := 0 | |||
var val int64 | |||
for _, p := range rc.iv { | |||
n := p.runlen() | |||
for j := int64(0); j < n; j++ { | |||
val = int64(p.start) + j | |||
x[k+i] = uint32(val) | mask | |||
k++ | |||
} | |||
} | |||
} | |||
func (rc *runContainer16) getShortIterator() shortIterable { | |||
return rc.newRunIterator16() | |||
} | |||
func (rc *runContainer16) getManyIterator() manyIterable { | |||
return rc.newManyRunIterator16() | |||
} | |||
// add the values in the range [firstOfRange, endx). endx | |||
// is still abe to express 2^16 because it is an int not an uint16. | |||
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { | |||
if firstOfRange >= endx { | |||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) | |||
} | |||
addme := newRunContainer16TakeOwnership([]interval16{ | |||
{ | |||
start: uint16(firstOfRange), | |||
length: uint16(endx - 1 - firstOfRange), | |||
}, | |||
}) | |||
*rc = *rc.union(addme) | |||
return rc | |||
} | |||
// remove the values in the range [firstOfRange,endx) | |||
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { | |||
if firstOfRange >= endx { | |||
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ | |||
" nothing to do.", firstOfRange, endx)) | |||
//return rc | |||
} | |||
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) | |||
rc.isubtract(x) | |||
return rc | |||
} | |||
// not flip the values in the range [firstOfRange,endx) | |||
func (rc *runContainer16) not(firstOfRange, endx int) container { | |||
if firstOfRange >= endx { | |||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) | |||
} | |||
return rc.Not(firstOfRange, endx) | |||
} | |||
// Not flips the values in the range [firstOfRange,endx). | |||
// This is not inplace. Only the returned value has the flipped bits. | |||
// | |||
// Currently implemented as (!A intersect B) union (A minus B), | |||
// where A is rc, and B is the supplied [firstOfRange, endx) interval. | |||
// | |||
// TODO(time optimization): convert this to a single pass | |||
// algorithm by copying AndNotRunContainer16() and modifying it. | |||
// Current routine is correct but | |||
// makes 2 more passes through the arrays than should be | |||
// strictly necessary. Measure both ways though--this may not matter. | |||
// | |||
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { | |||
if firstOfRange >= endx { | |||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) | |||
} | |||
if firstOfRange >= endx { | |||
return rc.Clone() | |||
} | |||
a := rc | |||
// algo: | |||
// (!A intersect B) union (A minus B) | |||
nota := a.invert() | |||
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} | |||
b := newRunContainer16TakeOwnership(bs) | |||
notAintersectB := nota.intersect(b) | |||
aMinusB := a.AndNotRunContainer16(b) | |||
rc2 := notAintersectB.union(aMinusB) | |||
return rc2 | |||
} | |||
// equals is now logical equals; it does not require the | |||
// same underlying container type. | |||
func (rc *runContainer16) equals(o container) bool { | |||
srb, ok := o.(*runContainer16) | |||
if !ok { | |||
// maybe value instead of pointer | |||
val, valok := o.(*runContainer16) | |||
if valok { | |||
srb = val | |||
ok = true | |||
} | |||
} | |||
if ok { | |||
// Check if the containers are the same object. | |||
if rc == srb { | |||
return true | |||
} | |||
if len(srb.iv) != len(rc.iv) { | |||
return false | |||
} | |||
for i, v := range rc.iv { | |||
if v != srb.iv[i] { | |||
return false | |||
} | |||
} | |||
return true | |||
} | |||
// use generic comparison | |||
if o.getCardinality() != rc.getCardinality() { | |||
return false | |||
} | |||
rit := rc.getShortIterator() | |||
bit := o.getShortIterator() | |||
//k := 0 | |||
for rit.hasNext() { | |||
if bit.next() != rit.next() { | |||
return false | |||
} | |||
//k++ | |||
} | |||
return true | |||
} | |||
func (rc *runContainer16) iaddReturnMinimized(x uint16) container { | |||
rc.Add(x) | |||
return rc | |||
} | |||
func (rc *runContainer16) iadd(x uint16) (wasNew bool) { | |||
return rc.Add(x) | |||
} | |||
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { | |||
rc.removeKey(x) | |||
return rc | |||
} | |||
func (rc *runContainer16) iremove(x uint16) bool { | |||
return rc.removeKey(x) | |||
} | |||
func (rc *runContainer16) or(a container) container { | |||
if rc.isFull() { | |||
return rc.clone() | |||
} | |||
switch c := a.(type) { | |||
case *runContainer16: | |||
return rc.union(c) | |||
case *arrayContainer: | |||
return rc.orArray(c) | |||
case *bitmapContainer: | |||
return rc.orBitmapContainer(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
func (rc *runContainer16) orCardinality(a container) int { | |||
switch c := a.(type) { | |||
case *runContainer16: | |||
return int(rc.unionCardinality(c)) | |||
case *arrayContainer: | |||
return rc.orArrayCardinality(c) | |||
case *bitmapContainer: | |||
return rc.orBitmapContainerCardinality(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
// orBitmapContainer finds the union of rc and bc. | |||
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { | |||
bc2 := newBitmapContainerFromRun(rc) | |||
return bc2.iorBitmap(bc) | |||
} | |||
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { | |||
answer := 0 | |||
for i := range rc.iv { | |||
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) | |||
} | |||
//bc.computeCardinality() | |||
return answer | |||
} | |||
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { | |||
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) | |||
} | |||
// orArray finds the union of rc and ac. | |||
func (rc *runContainer16) orArray(ac *arrayContainer) container { | |||
bc1 := newBitmapContainerFromRun(rc) | |||
bc2 := ac.toBitmapContainer() | |||
return bc1.orBitmap(bc2) | |||
} | |||
// orArray finds the union of rc and ac. | |||
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { | |||
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) | |||
} | |||
func (rc *runContainer16) ior(a container) container { | |||
if rc.isFull() { | |||
return rc | |||
} | |||
switch c := a.(type) { | |||
case *runContainer16: | |||
return rc.inplaceUnion(c) | |||
case *arrayContainer: | |||
return rc.iorArray(c) | |||
case *bitmapContainer: | |||
return rc.iorBitmapContainer(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { | |||
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv)) | |||
for _, p := range rc2.iv { | |||
last := int64(p.last()) | |||
for i := int64(p.start); i <= last; i++ { | |||
rc.Add(uint16(i)) | |||
} | |||
} | |||
return rc | |||
} | |||
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { | |||
it := bc.getShortIterator() | |||
for it.hasNext() { | |||
rc.Add(it.next()) | |||
} | |||
return rc | |||
} | |||
func (rc *runContainer16) iorArray(ac *arrayContainer) container { | |||
it := ac.getShortIterator() | |||
for it.hasNext() { | |||
rc.Add(it.next()) | |||
} | |||
return rc | |||
} | |||
// lazyIOR is described (not yet implemented) in | |||
// this nice note from @lemire on | |||
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 | |||
// | |||
// Description of lazyOR and lazyIOR from @lemire: | |||
// | |||
// Lazy functions are optional and can be simply | |||
// wrapper around non-lazy functions. | |||
// | |||
// The idea of "laziness" is as follows. It is | |||
// inspired by the concept of lazy evaluation | |||
// you might be familiar with (functional programming | |||
// and all that). So a roaring bitmap is | |||
// such that all its containers are, in some | |||
// sense, chosen to use as little memory as | |||
// possible. This is nice. Also, all bitsets | |||
// are "cardinality aware" so that you can do | |||
// fast rank/select queries, or query the | |||
// cardinality of the whole bitmap... very fast, | |||
// without latency. | |||
// | |||
// However, imagine that you are aggregating 100 | |||
// bitmaps together. So you OR the first two, then OR | |||
// that with the third one and so forth. Clearly, | |||
// intermediate bitmaps don't need to be as | |||
// compressed as possible, right? They can be | |||
// in a "dirty state". You only need the end | |||
// result to be in a nice state... which you | |||
// can achieve by calling repairAfterLazy at the end. | |||
// | |||
// The Java/C code does something special for | |||
// the in-place lazy OR runs. The idea is that | |||
// instead of taking two run containers and | |||
// generating a new one, we actually try to | |||
// do the computation in-place through a | |||
// technique invented by @gssiyankai (pinging him!). | |||
// What you do is you check whether the host | |||
// run container has lots of extra capacity. | |||
// If it does, you move its data at the end of | |||
// the backing array, and then you write | |||
// the answer at the beginning. What this | |||
// trick does is minimize memory allocations. | |||
// | |||
func (rc *runContainer16) lazyIOR(a container) container { | |||
// not lazy at the moment | |||
// TODO: make it lazy | |||
return rc.ior(a) | |||
/* | |||
switch c := a.(type) { | |||
case *arrayContainer: | |||
return rc.lazyIorArray(c) | |||
case *bitmapContainer: | |||
return rc.lazyIorBitmap(c) | |||
case *runContainer16: | |||
return rc.lazyIorRun16(c) | |||
} | |||
panic("unsupported container type") | |||
*/ | |||
} | |||
// lazyOR is described above in lazyIOR. | |||
func (rc *runContainer16) lazyOR(a container) container { | |||
// not lazy at the moment | |||
// TODO: make it lazy | |||
return rc.or(a) | |||
/* | |||
switch c := a.(type) { | |||
case *arrayContainer: | |||
return rc.lazyOrArray(c) | |||
case *bitmapContainer: | |||
return rc.lazyOrBitmap(c) | |||
case *runContainer16: | |||
return rc.lazyOrRunContainer16(c) | |||
} | |||
panic("unsupported container type") | |||
*/ | |||
} | |||
func (rc *runContainer16) intersects(a container) bool { | |||
// TODO: optimize by doing inplace/less allocation, possibly? | |||
isect := rc.and(a) | |||
return isect.getCardinality() > 0 | |||
} | |||
func (rc *runContainer16) xor(a container) container { | |||
switch c := a.(type) { | |||
case *arrayContainer: | |||
return rc.xorArray(c) | |||
case *bitmapContainer: | |||
return rc.xorBitmap(c) | |||
case *runContainer16: | |||
return rc.xorRunContainer16(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
func (rc *runContainer16) iandNot(a container) container { | |||
switch c := a.(type) { | |||
case *arrayContainer: | |||
return rc.iandNotArray(c) | |||
case *bitmapContainer: | |||
return rc.iandNotBitmap(c) | |||
case *runContainer16: | |||
return rc.iandNotRunContainer16(c) | |||
} | |||
panic("unsupported container type") | |||
} | |||
// flip the values in the range [firstOfRange,endx) | |||
func (rc *runContainer16) inot(firstOfRange, endx int) container { | |||
if firstOfRange >= endx { | |||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) | |||
} | |||
// TODO: minimize copies, do it all inplace; not() makes a copy. | |||
rc = rc.Not(firstOfRange, endx) | |||
return rc | |||
} | |||
func (rc *runContainer16) getCardinality() int { | |||
return int(rc.cardinality()) | |||
} | |||
func (rc *runContainer16) rank(x uint16) int { | |||
n := int64(len(rc.iv)) | |||
xx := int64(x) | |||
w, already, _ := rc.search(xx, nil) | |||
if w < 0 { | |||
return 0 | |||
} | |||
if !already && w == n-1 { | |||
return rc.getCardinality() | |||
} | |||
var rnk int64 | |||
if !already { | |||
for i := int64(0); i <= w; i++ { | |||
rnk += rc.iv[i].runlen() | |||
} | |||
return int(rnk) | |||
} | |||
for i := int64(0); i < w; i++ { | |||
rnk += rc.iv[i].runlen() | |||
} | |||
rnk += int64(x-rc.iv[w].start) + 1 | |||
return int(rnk) | |||
} | |||
func (rc *runContainer16) selectInt(x uint16) int { | |||
return rc.selectInt16(x) | |||
} | |||
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { | |||
return rc.AndNotRunContainer16(b) | |||
} | |||
func (rc *runContainer16) andNotArray(ac *arrayContainer) container { | |||
rcb := rc.toBitmapContainer() | |||
acb := ac.toBitmapContainer() | |||
return rcb.andNotBitmap(acb) | |||
} | |||
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { | |||
rcb := rc.toBitmapContainer() | |||
return rcb.andNotBitmap(bc) | |||
} | |||
func (rc *runContainer16) toBitmapContainer() *bitmapContainer { | |||
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv)) | |||
bc := newBitmapContainer() | |||
for i := range rc.iv { | |||
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) | |||
} | |||
bc.computeCardinality() | |||
return bc | |||
} | |||
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { | |||
rcb := rc.toBitmapContainer() | |||
x2b := x2.toBitmapContainer() | |||
rcb.iandNotBitmapSurely(x2b) | |||
// TODO: check size and optimize the return value | |||
// TODO: is inplace modification really required? If not, elide the copy. | |||
rc2 := newRunContainer16FromBitmapContainer(rcb) | |||
*rc = *rc2 | |||
return rc | |||
} | |||
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { | |||
rcb := rc.toBitmapContainer() | |||
acb := ac.toBitmapContainer() | |||
rcb.iandNotBitmapSurely(acb) | |||
// TODO: check size and optimize the return value | |||
// TODO: is inplace modification really required? If not, elide the copy. | |||
rc2 := newRunContainer16FromBitmapContainer(rcb) | |||
*rc = *rc2 | |||
return rc | |||
} | |||
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { | |||
rcb := rc.toBitmapContainer() | |||
rcb.iandNotBitmapSurely(bc) | |||
// TODO: check size and optimize the return value | |||
// TODO: is inplace modification really required? If not, elide the copy. | |||
rc2 := newRunContainer16FromBitmapContainer(rcb) | |||
*rc = *rc2 | |||
return rc | |||
} | |||
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { | |||
rcb := rc.toBitmapContainer() | |||
x2b := x2.toBitmapContainer() | |||
return rcb.xorBitmap(x2b) | |||
} | |||
func (rc *runContainer16) xorArray(ac *arrayContainer) container { | |||
rcb := rc.toBitmapContainer() | |||
acb := ac.toBitmapContainer() | |||
return rcb.xorBitmap(acb) | |||
} | |||
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { | |||
rcb := rc.toBitmapContainer() | |||
return rcb.xorBitmap(bc) | |||
} | |||
// convert to bitmap or array *if needed* | |||
func (rc *runContainer16) toEfficientContainer() container { | |||
// runContainer16SerializedSizeInBytes(numRuns) | |||
sizeAsRunContainer := rc.getSizeInBytes() | |||
sizeAsBitmapContainer := bitmapContainerSizeInBytes() | |||
card := int(rc.cardinality()) | |||
sizeAsArrayContainer := arrayContainerSizeInBytes(card) | |||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { | |||
return rc | |||
} | |||
if card <= arrayDefaultMaxSize { | |||
return rc.toArrayContainer() | |||
} | |||
bc := newBitmapContainerFromRun(rc) | |||
return bc | |||
} | |||
func (rc *runContainer16) toArrayContainer() *arrayContainer { | |||
ac := newArrayContainer() | |||
for i := range rc.iv { | |||
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) | |||
} | |||
return ac | |||
} | |||
func newRunContainer16FromContainer(c container) *runContainer16 { | |||
switch x := c.(type) { | |||
case *runContainer16: | |||
return x.Clone() | |||
case *arrayContainer: | |||
return newRunContainer16FromArray(x) | |||
case *bitmapContainer: | |||
return newRunContainer16FromBitmapContainer(x) | |||
} | |||
panic("unsupported container type") | |||
} |
@@ -6,12 +6,12 @@ | |||
package roaring | |||
import ( | |||
"bufio" | |||
"bytes" | |||
"encoding/base64" | |||
"fmt" | |||
"io" | |||
"strconv" | |||
"sync" | |||
) | |||
// Bitmap represents a compressed bitmap where you can add integers. | |||
@@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { | |||
return rb.highlowcontainer.toBytes() | |||
} | |||
// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized | |||
// Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized | |||
// version of this bitmap to stream. The format is not | |||
// compatible with the WriteTo() format, and is | |||
// experimental: it may produce smaller on disk | |||
@@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) { | |||
// The format is compatible with other RoaringBitmap | |||
// implementations (Java, C) and is documented here: | |||
// https://github.com/RoaringBitmap/RoaringFormatSpec | |||
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { | |||
return rb.highlowcontainer.readFrom(stream) | |||
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) { | |||
stream := byteInputAdapterPool.Get().(*byteInputAdapter) | |||
stream.reset(reader) | |||
p, err = rb.highlowcontainer.readFrom(stream) | |||
byteInputAdapterPool.Put(stream) | |||
return | |||
} | |||
// FromBuffer creates a bitmap from its serialized version stored in buffer | |||
@@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { | |||
// You should *not* change the copy-on-write status of the resulting | |||
// bitmaps (SetCopyOnWrite). | |||
// | |||
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) { | |||
return rb.highlowcontainer.fromBuffer(buf) | |||
// If buf becomes unavailable, then a bitmap created with | |||
// FromBuffer would be effectively broken. Furthermore, any | |||
// bitmap derived from this bitmap (e.g., via Or, And) might | |||
// also be broken. Thus, before making buf unavailable, you should | |||
// call CloneCopyOnWriteContainers on all such bitmaps. | |||
// | |||
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) { | |||
stream := byteBufferPool.Get().(*byteBuffer) | |||
stream.reset(buf) | |||
p, err = rb.highlowcontainer.readFrom(stream) | |||
byteBufferPool.Put(stream) | |||
return | |||
} | |||
var ( | |||
byteBufferPool = sync.Pool{ | |||
New: func() interface{} { | |||
return &byteBuffer{} | |||
}, | |||
} | |||
byteInputAdapterPool = sync.Pool{ | |||
New: func() interface{} { | |||
return &byteInputAdapter{} | |||
}, | |||
} | |||
) | |||
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap | |||
func (rb *Bitmap) RunOptimize() { | |||
rb.highlowcontainer.runOptimize() | |||
@@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool { | |||
return rb.highlowcontainer.hasRunCompression() | |||
} | |||
// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized | |||
// Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized | |||
// version of this bitmap from stream. The format is | |||
// expected is that written by the WriteToMsgpack() | |||
// call; see additional notes there. | |||
@@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) { | |||
} | |||
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap | |||
// (same as ToBytes) | |||
func (rb *Bitmap) MarshalBinary() ([]byte, error) { | |||
var buf bytes.Buffer | |||
writer := bufio.NewWriter(&buf) | |||
_, err := rb.WriteTo(writer) | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = writer.Flush() | |||
if err != nil { | |||
return nil, err | |||
} | |||
return buf.Bytes(), nil | |||
return rb.ToBytes() | |||
} | |||
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap | |||
func (rb *Bitmap) UnmarshalBinary(data []byte) error { | |||
var buf bytes.Buffer | |||
_, err := buf.Write(data) | |||
if err != nil { | |||
return err | |||
} | |||
reader := bufio.NewReader(&buf) | |||
_, err = rb.ReadFrom(reader) | |||
r := bytes.NewReader(data) | |||
_, err := rb.ReadFrom(r) | |||
return err | |||
} | |||
@@ -215,10 +233,20 @@ type IntIterable interface { | |||
Next() uint32 | |||
} | |||
// IntPeekable allows you to look at the next value without advancing and | |||
// advance as long as the next value is smaller than minval | |||
type IntPeekable interface { | |||
IntIterable | |||
// PeekNext peeks the next value without advancing the iterator | |||
PeekNext() uint32 | |||
// AdvanceIfNeeded advances as long as the next value is smaller than minval | |||
AdvanceIfNeeded(minval uint32) | |||
} | |||
type intIterator struct { | |||
pos int | |||
hs uint32 | |||
iter shortIterable | |||
iter shortPeekable | |||
highlowcontainer *roaringArray | |||
} | |||
@@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 { | |||
return x | |||
} | |||
// PeekNext peeks the next value without advancing the iterator | |||
func (ii *intIterator) PeekNext() uint32 { | |||
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs | |||
} | |||
// AdvanceIfNeeded advances as long as the next value is smaller than minval | |||
func (ii *intIterator) AdvanceIfNeeded(minval uint32) { | |||
to := minval >> 16 | |||
for ii.HasNext() && (ii.hs>>16) < to { | |||
ii.pos++ | |||
ii.init() | |||
} | |||
if ii.HasNext() && (ii.hs>>16) == to { | |||
ii.iter.advanceIfNeeded(lowbits(minval)) | |||
if !ii.iter.hasNext() { | |||
ii.pos++ | |||
ii.init() | |||
} | |||
} | |||
} | |||
func newIntIterator(a *Bitmap) *intIterator { | |||
p := new(intIterator) | |||
p.pos = 0 | |||
@@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator { | |||
return p | |||
} | |||
type intReverseIterator struct { | |||
pos int | |||
hs uint32 | |||
iter shortIterable | |||
highlowcontainer *roaringArray | |||
} | |||
// HasNext returns true if there are more integers to iterate over | |||
func (ii *intReverseIterator) HasNext() bool { | |||
return ii.pos >= 0 | |||
} | |||
func (ii *intReverseIterator) init() { | |||
if ii.pos >= 0 { | |||
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator() | |||
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16 | |||
} else { | |||
ii.iter = nil | |||
} | |||
} | |||
// Next returns the next integer | |||
func (ii *intReverseIterator) Next() uint32 { | |||
x := uint32(ii.iter.next()) | ii.hs | |||
if !ii.iter.hasNext() { | |||
ii.pos = ii.pos - 1 | |||
ii.init() | |||
} | |||
return x | |||
} | |||
func newIntReverseIterator(a *Bitmap) *intReverseIterator { | |||
p := new(intReverseIterator) | |||
p.highlowcontainer = &a.highlowcontainer | |||
p.pos = a.highlowcontainer.size() - 1 | |||
p.init() | |||
return p | |||
} | |||
// ManyIntIterable allows you to iterate over the values in a Bitmap | |||
type ManyIntIterable interface { | |||
// pass in a buffer to fill up with values, returns how many values were returned | |||
@@ -325,12 +416,20 @@ func (rb *Bitmap) String() string { | |||
return buffer.String() | |||
} | |||
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order | |||
func (rb *Bitmap) Iterator() IntIterable { | |||
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; | |||
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). | |||
func (rb *Bitmap) Iterator() IntPeekable { | |||
return newIntIterator(rb) | |||
} | |||
// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order | |||
// ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; | |||
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). | |||
func (rb *Bitmap) ReverseIterator() IntIterable { | |||
return newIntReverseIterator(rb) | |||
} | |||
// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; | |||
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). | |||
func (rb *Bitmap) ManyIterator() ManyIntIterable { | |||
return newManyIntIterator(rb) | |||
} | |||
@@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool { | |||
return false | |||
} | |||
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process | |||
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) { | |||
containerOffset := highbits(offset) | |||
inOffset := lowbits(offset) | |||
if inOffset == 0 { | |||
answer = x.Clone() | |||
for pos := 0; pos < answer.highlowcontainer.size(); pos++ { | |||
key := answer.highlowcontainer.getKeyAtIndex(pos) | |||
key += containerOffset | |||
answer.highlowcontainer.keys[pos] = key | |||
} | |||
} else { | |||
answer = New() | |||
for pos := 0; pos < x.highlowcontainer.size(); pos++ { | |||
key := x.highlowcontainer.getKeyAtIndex(pos) | |||
key += containerOffset | |||
c := x.highlowcontainer.getContainerAtIndex(pos) | |||
offsetted := c.addOffset(inOffset) | |||
if offsetted[0].getCardinality() > 0 { | |||
curSize := answer.highlowcontainer.size() | |||
lastkey := uint16(0) | |||
if curSize > 0 { | |||
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1) | |||
} | |||
if curSize > 0 && lastkey == key { | |||
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1) | |||
orrseult := prev.ior(offsetted[0]) | |||
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult) | |||
} else { | |||
answer.highlowcontainer.appendContainer(key, offsetted[0], false) | |||
} | |||
} | |||
if offsetted[1].getCardinality() > 0 { | |||
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false) | |||
} | |||
} | |||
} | |||
return answer | |||
} | |||
// Add the integer x to the bitmap | |||
func (rb *Bitmap) Add(x uint32) { | |||
hb := highbits(x) | |||
@@ -794,11 +933,6 @@ main: | |||
} | |||
} | |||
/*func (rb *Bitmap) Or(x2 *Bitmap) { | |||
results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage | |||
rb.highlowcontainer = results.highlowcontainer | |||
}*/ | |||
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap | |||
func (rb *Bitmap) AndNot(x2 *Bitmap) { | |||
pos1 := 0 | |||
@@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { | |||
return | |||
} | |||
hbStart := highbits(uint32(rangeStart)) | |||
lbStart := lowbits(uint32(rangeStart)) | |||
hbLast := highbits(uint32(rangeEnd - 1)) | |||
lbLast := lowbits(uint32(rangeEnd - 1)) | |||
hbStart := uint32(highbits(uint32(rangeStart))) | |||
lbStart := uint32(lowbits(uint32(rangeStart))) | |||
hbLast := uint32(highbits(uint32(rangeEnd - 1))) | |||
lbLast := uint32(lowbits(uint32(rangeEnd - 1))) | |||
var max uint32 = maxLowBit | |||
for hb := hbStart; hb <= hbLast; hb++ { | |||
@@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { | |||
containerLast = uint32(lbLast) | |||
} | |||
i := rb.highlowcontainer.getIndex(hb) | |||
i := rb.highlowcontainer.getIndex(uint16(hb)) | |||
if i >= 0 { | |||
c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) | |||
@@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) { | |||
} | |||
} else { // *think* the range of ones must never be | |||
// empty. | |||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) | |||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) | |||
} | |||
} | |||
} | |||
@@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) { | |||
lbLast := uint32(lowbits(uint32(rangeEnd - 1))) | |||
var max uint32 = maxLowBit | |||
for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ { | |||
for hb := hbStart; hb <= hbLast; hb++ { | |||
containerStart := uint32(0) | |||
if hb == uint16(hbStart) { | |||
if hb == hbStart { | |||
containerStart = lbStart | |||
} | |||
containerLast := max | |||
if hb == uint16(hbLast) { | |||
if hb == hbLast { | |||
containerLast = lbLast | |||
} | |||
i := rb.highlowcontainer.getIndex(hb) | |||
i := rb.highlowcontainer.getIndex(uint16(hb)) | |||
if i >= 0 { | |||
c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) | |||
rb.highlowcontainer.setContainerAtIndex(i, c) | |||
} else { // *think* the range of ones must never be | |||
// empty. | |||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) | |||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast))) | |||
} | |||
} | |||
} | |||
@@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { | |||
} | |||
answer := NewBitmap() | |||
hbStart := highbits(uint32(rangeStart)) | |||
lbStart := lowbits(uint32(rangeStart)) | |||
hbLast := highbits(uint32(rangeEnd - 1)) | |||
lbLast := lowbits(uint32(rangeEnd - 1)) | |||
hbStart := uint32(highbits(uint32(rangeStart))) | |||
lbStart := uint32(lowbits(uint32(rangeStart))) | |||
hbLast := uint32(highbits(uint32(rangeEnd - 1))) | |||
lbLast := uint32(lowbits(uint32(rangeEnd - 1))) | |||
// copy the containers before the active area | |||
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart) | |||
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart)) | |||
var max uint32 = maxLowBit | |||
for hb := hbStart; hb <= hbLast; hb++ { | |||
@@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap { | |||
containerLast = uint32(lbLast) | |||
} | |||
i := bm.highlowcontainer.getIndex(hb) | |||
j := answer.highlowcontainer.getIndex(hb) | |||
i := bm.highlowcontainer.getIndex(uint16(hb)) | |||
j := answer.highlowcontainer.getIndex(uint16(hb)) | |||
if i >= 0 { | |||
c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) | |||
if c.getCardinality() > 0 { | |||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c) | |||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c) | |||
} | |||
} else { // *think* the range of ones must never be | |||
// empty. | |||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, | |||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), | |||
rangeOfOnes(int(containerStart), int(containerLast))) | |||
} | |||
} | |||
// copy the containers after the active area. | |||
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast) | |||
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast)) | |||
return answer | |||
} | |||
@@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) { | |||
return rb.highlowcontainer.copyOnWrite | |||
} | |||
// CloneCopyOnWriteContainers clones all containers which have | |||
// needCopyOnWrite set to true. | |||
// This can be used to make sure it is safe to munmap a []byte | |||
// that the roaring array may still have a reference to, after | |||
// calling FromBuffer. | |||
// More generally this function is useful if you call FromBuffer | |||
// to construct a bitmap with a backing array buf | |||
// and then later discard the buf array. Note that you should call | |||
// CloneCopyOnWriteContainers on all bitmaps that were derived | |||
// from the 'FromBuffer' bitmap since they map have dependencies | |||
// on the buf array as well. | |||
func (rb *Bitmap) CloneCopyOnWriteContainers() { | |||
rb.highlowcontainer.cloneCopyOnWriteContainers() | |||
} | |||
// FlipInt calls Flip after casting the parameters (convenience method) | |||
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { | |||
return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) |
@@ -4,16 +4,16 @@ import ( | |||
"bytes" | |||
"encoding/binary" | |||
"fmt" | |||
"io" | |||
"io/ioutil" | |||
snappy "github.com/glycerine/go-unsnap-stream" | |||
"github.com/tinylib/msgp/msgp" | |||
"io" | |||
) | |||
//go:generate msgp -unexported | |||
type container interface { | |||
addOffset(uint16) []container | |||
clone() container | |||
and(container) container | |||
andCardinality(container) int | |||
@@ -37,7 +37,8 @@ type container interface { | |||
not(start, final int) container // range is [firstOfRange,lastOfRange) | |||
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) | |||
xor(r container) container | |||
getShortIterator() shortIterable | |||
getShortIterator() shortPeekable | |||
getReverseIterator() shortIterable | |||
getManyIterator() manyIterable | |||
contains(i uint16) bool | |||
maximum() uint16 | |||
@@ -61,7 +62,6 @@ type container interface { | |||
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) | |||
selectInt(x uint16) int // selectInt returns the xth integer in the container | |||
serializedSizeInBytes() int | |||
readFrom(io.Reader) (int, error) | |||
writeTo(io.Writer) (int, error) | |||
numberOfRuns() int | |||
@@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray { | |||
return &sa | |||
} | |||
// clone all containers which have needCopyOnWrite set to true | |||
// This can be used to make sure it is safe to munmap a []byte | |||
// that the roaring array may still have a reference to. | |||
func (ra *roaringArray) cloneCopyOnWriteContainers() { | |||
for i, needCopyOnWrite := range ra.needCopyOnWrite { | |||
if needCopyOnWrite { | |||
ra.containers[i] = ra.containers[i].clone() | |||
ra.needCopyOnWrite[i] = false | |||
} | |||
} | |||
} | |||
// unused function: | |||
//func (ra *roaringArray) containsKey(x uint16) bool { | |||
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) | |||
@@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 { | |||
// | |||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec | |||
// | |||
func (ra *roaringArray) toBytes() ([]byte, error) { | |||
stream := &bytes.Buffer{} | |||
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) { | |||
hasRun := ra.hasRunCompression() | |||
isRunSizeInBytes := 0 | |||
cookieSize := 8 | |||
@@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) { | |||
} | |||
} | |||
_, err := stream.Write(buf[:nw]) | |||
written, err := w.Write(buf[:nw]) | |||
if err != nil { | |||
return nil, err | |||
return n, err | |||
} | |||
for i, c := range ra.containers { | |||
_ = i | |||
_, err := c.writeTo(stream) | |||
n += int64(written) | |||
for _, c := range ra.containers { | |||
written, err := c.writeTo(w) | |||
if err != nil { | |||
return nil, err | |||
return n, err | |||
} | |||
n += int64(written) | |||
} | |||
return stream.Bytes(), nil | |||
return n, nil | |||
} | |||
// | |||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec | |||
// | |||
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) { | |||
by, err := ra.toBytes() | |||
if err != nil { | |||
return 0, err | |||
} | |||
n, err := out.Write(by) | |||
if err == nil && n < len(by) { | |||
err = io.ErrShortWrite | |||
} | |||
return int64(n), err | |||
func (ra *roaringArray) toBytes() ([]byte, error) { | |||
var buf bytes.Buffer | |||
_, err := ra.writeTo(&buf) | |||
return buf.Bytes(), err | |||
} | |||
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { | |||
pos := 0 | |||
if len(buf) < 8 { | |||
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf)) | |||
func (ra *roaringArray) readFrom(stream byteInput) (int64, error) { | |||
cookie, err := stream.readUInt32() | |||
if err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) | |||
} | |||
cookie := binary.LittleEndian.Uint32(buf) | |||
pos += 4 | |||
var size uint32 // number of containers | |||
haveRunContainers := false | |||
var size uint32 | |||
var isRunBitmap []byte | |||
// cookie header | |||
if cookie&0x0000FFFF == serialCookie { | |||
haveRunContainers = true | |||
size = uint32(uint16(cookie>>16) + 1) // number of containers | |||
size = uint32(uint16(cookie>>16) + 1) | |||
// create is-run-container bitmap | |||
isRunBitmapSize := (int(size) + 7) / 8 | |||
if pos+isRunBitmapSize > len(buf) { | |||
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize) | |||
} | |||
isRunBitmap, err = stream.next(isRunBitmapSize) | |||
isRunBitmap = buf[pos : pos+isRunBitmapSize] | |||
pos += isRunBitmapSize | |||
if err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err) | |||
} | |||
} else if cookie == serialCookieNoRunContainer { | |||
size = binary.LittleEndian.Uint32(buf[pos:]) | |||
pos += 4 | |||
size, err = stream.readUInt32() | |||
if err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err) | |||
} | |||
} else { | |||
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") | |||
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") | |||
} | |||
if size > (1 << 16) { | |||
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") | |||
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers") | |||
} | |||
// descriptive header | |||
// keycard - is {key, cardinality} tuple slice | |||
if pos+2*2*int(size) > len(buf) { | |||
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size)) | |||
buf, err := stream.next(2 * 2 * int(size)) | |||
if err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err) | |||
} | |||
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)]) | |||
pos += 2 * 2 * int(size) | |||
if !haveRunContainers || size >= noOffsetThreshold { | |||
pos += 4 * int(size) | |||
keycard := byteSliceAsUint16Slice(buf) | |||
if isRunBitmap == nil || size >= noOffsetThreshold { | |||
if err := stream.skipBytes(int(size) * 4); err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err) | |||
} | |||
} | |||
// Allocate slices upfront as number of containers is known | |||
@@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { | |||
} else { | |||
ra.containers = make([]container, size) | |||
} | |||
if cap(ra.keys) >= int(size) { | |||
ra.keys = ra.keys[:size] | |||
} else { | |||
ra.keys = make([]uint16, size) | |||
} | |||
if cap(ra.needCopyOnWrite) >= int(size) { | |||
ra.needCopyOnWrite = ra.needCopyOnWrite[:size] | |||
} else { | |||
@@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { | |||
} | |||
for i := uint32(0); i < size; i++ { | |||
key := uint16(keycard[2*i]) | |||
key := keycard[2*i] | |||
card := int(keycard[2*i+1]) + 1 | |||
ra.keys[i] = key | |||
ra.needCopyOnWrite[i] = true | |||
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 { | |||
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 { | |||
// run container | |||
nr := binary.LittleEndian.Uint16(buf[pos:]) | |||
pos += 2 | |||
if pos+int(nr)*4 > len(buf) { | |||
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4) | |||
nr, err := stream.readUInt16() | |||
if err != nil { | |||
return 0, fmt.Errorf("failed to read runtime container size: %s", err) | |||
} | |||
buf, err := stream.next(int(nr) * 4) | |||
if err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err) | |||
} | |||
nb := runContainer16{ | |||
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]), | |||
iv: byteSliceAsInterval16Slice(buf), | |||
card: int64(card), | |||
} | |||
pos += int(nr) * 4 | |||
ra.containers[i] = &nb | |||
} else if card > arrayDefaultMaxSize { | |||
// bitmap container | |||
buf, err := stream.next(arrayDefaultMaxSize * 2) | |||
if err != nil { | |||
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err) | |||
} | |||
nb := bitmapContainer{ | |||
cardinality: card, | |||
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]), | |||
bitmap: byteSliceAsUint64Slice(buf), | |||
} | |||
pos += arrayDefaultMaxSize * 2 | |||
ra.containers[i] = &nb | |||
} else { | |||
// array container | |||
nb := arrayContainer{ | |||
byteSliceAsUint16Slice(buf[pos : pos+card*2]), | |||
} | |||
pos += card * 2 | |||
ra.containers[i] = &nb | |||
} | |||
} | |||
return int64(pos), nil | |||
} | |||
buf, err := stream.next(card * 2) | |||
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) { | |||
pos := 0 | |||
var cookie uint32 | |||
err := binary.Read(stream, binary.LittleEndian, &cookie) | |||
if err != nil { | |||
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) | |||
} | |||
pos += 4 | |||
var size uint32 | |||
haveRunContainers := false | |||
var isRun *bitmapContainer | |||
if cookie&0x0000FFFF == serialCookie { | |||
haveRunContainers = true | |||
size = uint32(uint16(cookie>>16) + 1) | |||
bytesToRead := (int(size) + 7) / 8 | |||
numwords := (bytesToRead + 7) / 8 | |||
by := make([]byte, bytesToRead, numwords*8) | |||
nr, err := io.ReadFull(stream, by) | |||
if err != nil { | |||
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+ | |||
"runContainer bit flags of length %v bytes: %v", bytesToRead, err) | |||
} | |||
pos += bytesToRead | |||
by = by[:cap(by)] | |||
isRun = newBitmapContainer() | |||
for i := 0; i < numwords; i++ { | |||
isRun.bitmap[i] = binary.LittleEndian.Uint64(by) | |||
by = by[8:] | |||
} | |||
} else if cookie == serialCookieNoRunContainer { | |||
err = binary.Read(stream, binary.LittleEndian, &size) | |||
if err != nil { | |||
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err) | |||
} | |||
pos += 4 | |||
} else { | |||
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header") | |||
} | |||
if size > (1 << 16) { | |||
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.") | |||
} | |||
// descriptive header | |||
keycard := make([]uint16, 2*size, 2*size) | |||
err = binary.Read(stream, binary.LittleEndian, keycard) | |||
if err != nil { | |||
return 0, err | |||
} | |||
pos += 2 * 2 * int(size) | |||
// offset header | |||
if !haveRunContainers || size >= noOffsetThreshold { | |||
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored | |||
pos += 4 * int(size) | |||
} | |||
for i := uint32(0); i < size; i++ { | |||
key := int(keycard[2*i]) | |||
card := int(keycard[2*i+1]) + 1 | |||
if haveRunContainers && isRun.contains(uint16(i)) { | |||
nb := newRunContainer16() | |||
nr, err := nb.readFrom(stream) | |||
if err != nil { | |||
return 0, err | |||
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err) | |||
} | |||
pos += nr | |||
ra.appendContainer(uint16(key), nb, false) | |||
} else if card > arrayDefaultMaxSize { | |||
nb := newBitmapContainer() | |||
nr, err := nb.readFrom(stream) | |||
if err != nil { | |||
return 0, err | |||
} | |||
nb.cardinality = card | |||
pos += nr | |||
ra.appendContainer(keycard[2*i], nb, false) | |||
} else { | |||
nb := newArrayContainerSize(card) | |||
nr, err := nb.readFrom(stream) | |||
if err != nil { | |||
return 0, err | |||
nb := arrayContainer{ | |||
byteSliceAsUint16Slice(buf), | |||
} | |||
pos += nr | |||
ra.appendContainer(keycard[2*i], nb, false) | |||
ra.containers[i] = &nb | |||
} | |||
} | |||
return int64(pos), nil | |||
return stream.getReadBytes(), nil | |||
} | |||
func (ra *roaringArray) hasRunCompression() bool { |
@@ -8,7 +8,7 @@ import ( | |||
"github.com/tinylib/msgp/msgp" | |||
) | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 2 | |||
// write "t" | |||
@@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 2 | |||
@@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *containerSerz) Msgsize() (s int) { | |||
s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize() | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { | |||
{ | |||
var zajw uint8 | |||
@@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z contype) EncodeMsg(en *msgp.Writer) (err error) { | |||
err = en.WriteUint8(uint8(z)) | |||
if err != nil { | |||
@@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z contype) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
o = msgp.AppendUint8(o, uint8(z)) | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
{ | |||
var zwht uint8 | |||
@@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z contype) Msgsize() (s int) { | |||
s = msgp.Uint8Size | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 4 | |||
// write "keys" | |||
@@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 4 | |||
@@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *roaringArray) Msgsize() (s int) { | |||
s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize | |||
for zxhx := range z.conserz { |
@@ -6,7 +6,7 @@ package roaring | |||
import "github.com/tinylib/msgp/msgp" | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 5 | |||
// write "runstart" | |||
@@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 5 | |||
@@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *addHelper16) Msgsize() (s int) { | |||
s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3 | |||
if z.rc == nil { | |||
@@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) { | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 2 | |||
// write "start" | |||
@@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 2 | |||
@@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z interval16) Msgsize() (s int) { | |||
s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 2 | |||
// write "iv" | |||
@@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 2 | |||
@@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *runContainer16) Msgsize() (s int) { | |||
s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var field []byte | |||
_ = field | |||
@@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
if err != nil { | |||
return | |||
} | |||
case "curSeq": | |||
z.curSeq, err = dc.ReadInt64() | |||
if err != nil { | |||
return | |||
} | |||
default: | |||
err = dc.Skip() | |||
if err != nil { | |||
@@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { | |||
// map header, size 4 | |||
// map header, size 3 | |||
// write "rc" | |||
err = en.Append(0x84, 0xa2, 0x72, 0x63) | |||
err = en.Append(0x83, 0xa2, 0x72, 0x63) | |||
if err != nil { | |||
return err | |||
} | |||
@@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { | |||
if err != nil { | |||
return | |||
} | |||
// write "curSeq" | |||
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) | |||
if err != nil { | |||
return err | |||
} | |||
err = en.WriteInt64(z.curSeq) | |||
if err != nil { | |||
return | |||
} | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
// map header, size 4 | |||
// map header, size 3 | |||
// string "rc" | |||
o = append(o, 0x84, 0xa2, 0x72, 0x63) | |||
o = append(o, 0x83, 0xa2, 0x72, 0x63) | |||
if z.rc == nil { | |||
o = msgp.AppendNil(o) | |||
} else { | |||
@@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { | |||
// string "curPosInIndex" | |||
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) | |||
o = msgp.AppendUint16(o, z.curPosInIndex) | |||
// string "curSeq" | |||
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71) | |||
o = msgp.AppendInt64(o, z.curSeq) | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var field []byte | |||
_ = field | |||
@@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
if err != nil { | |||
return | |||
} | |||
case "curSeq": | |||
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts) | |||
if err != nil { | |||
return | |||
} | |||
default: | |||
bts, err = msgp.Skip(bts) | |||
if err != nil { | |||
@@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z *runIterator16) Msgsize() (s int) { | |||
s = 1 + 3 | |||
if z.rc == nil { | |||
@@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) { | |||
} else { | |||
s += z.rc.Msgsize() | |||
} | |||
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size | |||
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size | |||
return | |||
} | |||
// DecodeMsg implements msgp.Decodable | |||
// Deprecated: DecodeMsg implements msgp.Decodable | |||
func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { | |||
var zjpj uint32 | |||
zjpj, err = dc.ReadArrayHeader() | |||
@@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { | |||
return | |||
} | |||
// EncodeMsg implements msgp.Encodable | |||
// Deprecated: EncodeMsg implements msgp.Encodable | |||
func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { | |||
err = en.WriteArrayHeader(uint32(len(z))) | |||
if err != nil { | |||
@@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { | |||
return | |||
} | |||
// MarshalMsg implements msgp.Marshaler | |||
// Deprecated: MarshalMsg implements msgp.Marshaler | |||
func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { | |||
o = msgp.Require(b, z.Msgsize()) | |||
o = msgp.AppendArrayHeader(o, uint32(len(z))) | |||
@@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { | |||
return | |||
} | |||
// UnmarshalMsg implements msgp.Unmarshaler | |||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler | |||
func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
var zgmo uint32 | |||
zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) | |||
@@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { | |||
return | |||
} | |||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message | |||
func (z uint16Slice) Msgsize() (s int) { | |||
s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size)) | |||
return |
@@ -2,8 +2,6 @@ package roaring | |||
import ( | |||
"encoding/binary" | |||
"errors" | |||
"fmt" | |||
"io" | |||
"github.com/tinylib/msgp/msgp" | |||
@@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) { | |||
return stream.Write(buf) | |||
} | |||
func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) { | |||
bts, err := b.MarshalMsg(nil) | |||
if err != nil { | |||
return 0, err | |||
} | |||
return stream.Write(bts) | |||
} | |||
func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { | |||
bts, err := b.MarshalMsg(nil) | |||
if err != nil { | |||
@@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { | |||
return stream.Write(bts) | |||
} | |||
func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) { | |||
err := msgp.Decode(stream, b) | |||
return 0, err | |||
} | |||
func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) { | |||
err := msgp.Decode(stream, b) | |||
return 0, err | |||
} | |||
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected") | |||
func (b *runContainer16) readFrom(stream io.Reader) (int, error) { | |||
b.iv = b.iv[:0] | |||
b.card = 0 | |||
var numRuns uint16 | |||
err := binary.Read(stream, binary.LittleEndian, &numRuns) | |||
if err != nil { | |||
return 0, err | |||
} | |||
nr := int(numRuns) | |||
encRun := make([]uint16, 2*nr) | |||
by := make([]byte, 4*nr) | |||
err = binary.Read(stream, binary.LittleEndian, &by) | |||
if err != nil { | |||
return 0, err | |||
} | |||
for i := range encRun { | |||
if len(by) < 2 { | |||
return 0, errCorruptedStream | |||
} | |||
encRun[i] = binary.LittleEndian.Uint16(by) | |||
by = by[2:] | |||
} | |||
for i := 0; i < nr; i++ { | |||
if i > 0 && b.iv[i-1].last() >= encRun[i*2] { | |||
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2]) | |||
} | |||
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]}) | |||
b.card += int64(encRun[i*2+1]) + 1 | |||
} | |||
return 0, err | |||
} |
@@ -4,6 +4,7 @@ package roaring | |||
import ( | |||
"encoding/binary" | |||
"errors" | |||
"io" | |||
) | |||
@@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) { | |||
} | |||
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) { | |||
if b.cardinality <= arrayDefaultMaxSize { | |||
return 0, errors.New("refusing to write bitmap container with cardinality of array container") | |||
} | |||
// Write set | |||
buf := make([]byte, 8*len(b.bitmap)) | |||
for i, v := range b.bitmap { | |||
@@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte { | |||
return by | |||
} | |||
func uint16SliceAsByteSlice(slice []uint16) []byte { | |||
by := make([]byte, len(slice)*2) | |||
for i, v := range slice { | |||
binary.LittleEndian.PutUint16(by[i*2:], v) | |||
} | |||
return by | |||
} | |||
func byteSliceAsUint16Slice(slice []byte) []uint16 { | |||
if len(slice)%2 != 0 { | |||
panic("Slice size should be divisible by 2") |
@@ -3,8 +3,10 @@ | |||
package roaring | |||
import ( | |||
"errors" | |||
"io" | |||
"reflect" | |||
"runtime" | |||
"unsafe" | |||
) | |||
@@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) { | |||
} | |||
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) { | |||
if bc.cardinality <= arrayDefaultMaxSize { | |||
return 0, errors.New("refusing to write bitmap container with cardinality of array container") | |||
} | |||
buf := uint64SliceAsByteSlice(bc.bitmap) | |||
return stream.Write(buf) | |||
} | |||
// readFrom reads an arrayContainer from stream. | |||
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content) | |||
// *before* you call readFrom. We can't guess the size in the stream | |||
// by this point. | |||
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) { | |||
buf := uint16SliceAsByteSlice(ac.content) | |||
return io.ReadFull(stream, buf) | |||
} | |||
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) { | |||
buf := uint64SliceAsByteSlice(bc.bitmap) | |||
n, err := io.ReadFull(stream, buf) | |||
bc.computeCardinality() | |||
return n, err | |||
} | |||
func uint64SliceAsByteSlice(slice []uint64) []byte { | |||
// make a new slice header | |||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
@@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte { | |||
header.Len *= 8 | |||
header.Cap *= 8 | |||
// instantiate result and use KeepAlive so data isn't unmapped. | |||
result := *(*[]byte)(unsafe.Pointer(&header)) | |||
runtime.KeepAlive(&slice) | |||
// return it | |||
return *(*[]byte)(unsafe.Pointer(&header)) | |||
return result | |||
} | |||
func uint16SliceAsByteSlice(slice []uint16) []byte { | |||
@@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte { | |||
header.Len *= 2 | |||
header.Cap *= 2 | |||
// instantiate result and use KeepAlive so data isn't unmapped. | |||
result := *(*[]byte)(unsafe.Pointer(&header)) | |||
runtime.KeepAlive(&slice) | |||
// return it | |||
return *(*[]byte)(unsafe.Pointer(&header)) | |||
return result | |||
} | |||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { | |||
@@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { | |||
// Deserialization code follows | |||
func byteSliceAsUint16Slice(slice []byte) []uint16 { | |||
//// | |||
// These methods (byteSliceAsUint16Slice,...) do not make copies, | |||
// they are pointer-based (unsafe). The caller is responsible to | |||
// ensure that the input slice does not get garbage collected, deleted | |||
// or modified while you hold the returned slince. | |||
//// | |||
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder | |||
if len(slice)%2 != 0 { | |||
panic("Slice size should be divisible by 2") | |||
} | |||
// reference: https://go101.org/article/unsafe.html | |||
// make a new slice header | |||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) | |||
// update its capacity and length | |||
header.Len /= 2 | |||
header.Cap /= 2 | |||
// transfer the data from the given slice to a new variable (our result) | |||
rHeader.Data = bHeader.Data | |||
rHeader.Len = bHeader.Len / 2 | |||
rHeader.Cap = bHeader.Cap / 2 | |||
// return it | |||
return *(*[]uint16)(unsafe.Pointer(&header)) | |||
// instantiate result and use KeepAlive so data isn't unmapped. | |||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it) | |||
// return result | |||
return | |||
} | |||
func byteSliceAsUint64Slice(slice []byte) []uint64 { | |||
func byteSliceAsUint64Slice(slice []byte) (result []uint64) { | |||
if len(slice)%8 != 0 { | |||
panic("Slice size should be divisible by 8") | |||
} | |||
// reference: https://go101.org/article/unsafe.html | |||
// make a new slice header | |||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) | |||
// update its capacity and length | |||
header.Len /= 8 | |||
header.Cap /= 8 | |||
// transfer the data from the given slice to a new variable (our result) | |||
rHeader.Data = bHeader.Data | |||
rHeader.Len = bHeader.Len / 8 | |||
rHeader.Cap = bHeader.Cap / 8 | |||
// return it | |||
return *(*[]uint64)(unsafe.Pointer(&header)) | |||
// instantiate result and use KeepAlive so data isn't unmapped. | |||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it) | |||
// return result | |||
return | |||
} | |||
func byteSliceAsInterval16Slice(slice []byte) []interval16 { | |||
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) { | |||
if len(slice)%4 != 0 { | |||
panic("Slice size should be divisible by 4") | |||
} | |||
// reference: https://go101.org/article/unsafe.html | |||
// make a new slice header | |||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) | |||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result)) | |||
// update its capacity and length | |||
header.Len /= 4 | |||
header.Cap /= 4 | |||
// transfer the data from the given slice to a new variable (our result) | |||
rHeader.Data = bHeader.Data | |||
rHeader.Len = bHeader.Len / 4 | |||
rHeader.Cap = bHeader.Cap / 4 | |||
// return it | |||
return *(*[]interval16)(unsafe.Pointer(&header)) | |||
// instantiate result and use KeepAlive so data isn't unmapped. | |||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it) | |||
// return result | |||
return | |||
} |
@@ -5,6 +5,12 @@ type shortIterable interface { | |||
next() uint16 | |||
} | |||
type shortPeekable interface { | |||
shortIterable | |||
peekNext() uint16 | |||
advanceIfNeeded(minval uint16) | |||
} | |||
type shortIterator struct { | |||
slice []uint16 | |||
loc int | |||
@@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 { | |||
si.loc++ | |||
return a | |||
} | |||
func (si *shortIterator) peekNext() uint16 { | |||
return si.slice[si.loc] | |||
} | |||
func (si *shortIterator) advanceIfNeeded(minval uint16) { | |||
if si.hasNext() && si.peekNext() < minval { | |||
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval) | |||
} | |||
} | |||
type reverseIterator struct { | |||
slice []uint16 | |||
loc int | |||
} | |||
func (si *reverseIterator) hasNext() bool { | |||
return si.loc >= 0 | |||
} | |||
func (si *reverseIterator) next() uint16 { | |||
a := si.slice[si.loc] | |||
si.loc-- | |||
return a | |||
} |
@@ -14,6 +14,17 @@ const ( | |||
serialCookie = 12347 // runs, arrays, and bitmaps | |||
noOffsetThreshold = 4 | |||
// MaxUint32 is the largest uint32 value. | |||
MaxUint32 = 4294967295 | |||
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper | |||
// bound for ranges. | |||
MaxRange uint64 = MaxUint32 + 1 | |||
// MaxUint16 is the largest 16 bit unsigned int. | |||
// This is the largest value an interval16 can store. | |||
MaxUint16 = 65535 | |||
// Compute wordSizeInBytes, the size of a word in bytes. | |||
_m = ^uint64(0) | |||
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1 | |||
@@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) { | |||
endword := (end - 1) / 64 | |||
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64)) | |||
for i := firstword; i < endword; i++ { | |||
//p("flipBitmapRange on i=%v", i) | |||
bitmap[i] = ^bitmap[i] | |||
} | |||
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64) | |||
@@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 { | |||
} | |||
return b | |||
} | |||
func maxInt(a, b int) int { | |||
if a > b { | |||
return a | |||
} | |||
return b | |||
} | |||
func maxUint16(a, b uint16) uint16 { | |||
if a > b { | |||
return a | |||
} | |||
return b | |||
} | |||
func minUint16(a, b uint16) uint16 { | |||
if a < b { | |||
return a | |||
} | |||
return b | |||
} |
@@ -3,9 +3,9 @@ sudo: false | |||
language: go | |||
go: | |||
- "1.9.x" | |||
- "1.10.x" | |||
- "1.11.x" | |||
- "1.12.x" | |||
script: | |||
- go get golang.org/x/tools/cmd/cover | |||
@@ -15,7 +15,12 @@ script: | |||
- gvt restore | |||
- go test -race -v $(go list ./... | grep -v vendor/) | |||
- go vet $(go list ./... | grep -v vendor/) | |||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/) | |||
- go test ./test -v -indexType scorch | |||
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then | |||
echo "errcheck skipped for go version" $TRAVIS_GO_VERSION; | |||
else | |||
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/); | |||
fi | |||
- docs/project-code-coverage.sh | |||
- docs/build_children.sh | |||
@@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { | |||
return fieldLength, tokenFreqs | |||
} | |||
func (t *TextField) Analyzer() *analysis.Analyzer { | |||
return t.analyzer | |||
} | |||
func (t *TextField) Value() []byte { | |||
return t.value | |||
} |
@@ -37,6 +37,12 @@ var geoTolerance = 1E-6 | |||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 | |||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 | |||
// Point represents a geo point. | |||
type Point struct { | |||
Lon float64 | |||
Lat float64 | |||
} | |||
// MortonHash computes the morton hash value for the provided geo point | |||
// This point is ordered as lon, lat. | |||
func MortonHash(lon, lat float64) uint64 { | |||
@@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error { | |||
} | |||
return nil | |||
} | |||
func BoundingRectangleForPolygon(polygon []Point) ( | |||
float64, float64, float64, float64, error) { | |||
err := checkLongitude(polygon[0].Lon) | |||
if err != nil { | |||
return 0, 0, 0, 0, err | |||
} | |||
err = checkLatitude(polygon[0].Lat) | |||
if err != nil { | |||
return 0, 0, 0, 0, err | |||
} | |||
maxY, minY := polygon[0].Lat, polygon[0].Lat | |||
maxX, minX := polygon[0].Lon, polygon[0].Lon | |||
for i := 1; i < len(polygon); i++ { | |||
err := checkLongitude(polygon[i].Lon) | |||
if err != nil { | |||
return 0, 0, 0, 0, err | |||
} | |||
err = checkLatitude(polygon[i].Lat) | |||
if err != nil { | |||
return 0, 0, 0, 0, err | |||
} | |||
maxY = math.Max(maxY, polygon[i].Lat) | |||
minY = math.Min(minY, polygon[i].Lat) | |||
maxX = math.Max(maxX, polygon[i].Lon) | |||
minX = math.Min(minX, polygon[i].Lon) | |||
} | |||
return minX, maxY, maxX, minY, nil | |||
} |
@@ -1,32 +1,21 @@ | |||
// The code here was obtained from: | |||
// https://github.com/mmcloughlin/geohash | |||
// The MIT License (MIT) | |||
// Copyright (c) 2015 Michael McLoughlin | |||
// Permission is hereby granted, free of charge, to any person obtaining a copy | |||
// of this software and associated documentation files (the "Software"), to deal | |||
// in the Software without restriction, including without limitation the rights | |||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
// copies of the Software, and to permit persons to whom the Software is | |||
// furnished to do so, subject to the following conditions: | |||
// The above copyright notice and this permission notice shall be included in all | |||
// copies or substantial portions of the Software. | |||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
// SOFTWARE. | |||
// Copyright (c) 2019 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// This implementation is inspired from the geohash-js | |||
// ref: https://github.com/davetroy/geohash-js | |||
package geo | |||
import ( | |||
"math" | |||
) | |||
// encoding encapsulates an encoding defined by a given base32 alphabet. | |||
type encoding struct { | |||
enc string | |||
@@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding { | |||
return e | |||
} | |||
// Decode string into bits of a 64-bit word. The string s may be at most 12 | |||
// characters. | |||
func (e *encoding) decode(s string) uint64 { | |||
x := uint64(0) | |||
for i := 0; i < len(s); i++ { | |||
x = (x << 5) | uint64(e.dec[s[i]]) | |||
} | |||
return x | |||
} | |||
// Encode bits of 64-bit word into a string. | |||
func (e *encoding) encode(x uint64) string { | |||
b := [12]byte{} | |||
for i := 0; i < 12; i++ { | |||
b[11-i] = e.enc[x&0x1f] | |||
x >>= 5 | |||
} | |||
return string(b[:]) | |||
} | |||
// Base32Encoding with the Geohash alphabet. | |||
// base32encoding with the Geohash alphabet. | |||
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz") | |||
// BoundingBox returns the region encoded by the given string geohash. | |||
func geoBoundingBox(hash string) geoBox { | |||
bits := uint(5 * len(hash)) | |||
inthash := base32encoding.decode(hash) | |||
return geoBoundingBoxIntWithPrecision(inthash, bits) | |||
} | |||
// Box represents a rectangle in latitude/longitude space. | |||
type geoBox struct { | |||
minLat float64 | |||
maxLat float64 | |||
minLng float64 | |||
maxLng float64 | |||
} | |||
// Round returns a point inside the box, making an effort to round to minimal | |||
// precision. | |||
func (b geoBox) round() (lat, lng float64) { | |||
x := maxDecimalPower(b.maxLat - b.minLat) | |||
lat = math.Ceil(b.minLat/x) * x | |||
x = maxDecimalPower(b.maxLng - b.minLng) | |||
lng = math.Ceil(b.minLng/x) * x | |||
return | |||
} | |||
// precalculated for performance | |||
var exp232 = math.Exp2(32) | |||
// errorWithPrecision returns the error range in latitude and longitude for in | |||
// integer geohash with bits of precision. | |||
func errorWithPrecision(bits uint) (latErr, lngErr float64) { | |||
b := int(bits) | |||
latBits := b / 2 | |||
lngBits := b - latBits | |||
latErr = math.Ldexp(180.0, -latBits) | |||
lngErr = math.Ldexp(360.0, -lngBits) | |||
return | |||
} | |||
// minDecimalPlaces returns the minimum number of decimal places such that | |||
// there must exist an number with that many places within any range of width | |||
// r. This is intended for returning minimal precision coordinates inside a | |||
// box. | |||
func maxDecimalPower(r float64) float64 { | |||
m := int(math.Floor(math.Log10(r))) | |||
return math.Pow10(m) | |||
} | |||
// Encode the position of x within the range -r to +r as a 32-bit integer. | |||
func encodeRange(x, r float64) uint32 { | |||
p := (x + r) / (2 * r) | |||
return uint32(p * exp232) | |||
} | |||
// Decode the 32-bit range encoding X back to a value in the range -r to +r. | |||
func decodeRange(X uint32, r float64) float64 { | |||
p := float64(X) / exp232 | |||
x := 2*r*p - r | |||
return x | |||
} | |||
// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are | |||
// ignored, and may take any value. | |||
func squash(X uint64) uint32 { | |||
X &= 0x5555555555555555 | |||
X = (X | (X >> 1)) & 0x3333333333333333 | |||
X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f | |||
X = (X | (X >> 4)) & 0x00ff00ff00ff00ff | |||
X = (X | (X >> 8)) & 0x0000ffff0000ffff | |||
X = (X | (X >> 16)) & 0x00000000ffffffff | |||
return uint32(X) | |||
} | |||
var masks = []uint64{16, 8, 4, 2, 1} | |||
// DecodeGeoHash decodes the string geohash faster with | |||
// higher precision. This api is in experimental phase. | |||
func DecodeGeoHash(geoHash string) (float64, float64) { | |||
even := true | |||
lat := []float64{-90.0, 90.0} | |||
lon := []float64{-180.0, 180.0} | |||
for i := 0; i < len(geoHash); i++ { | |||
cd := uint64(base32encoding.dec[geoHash[i]]) | |||
for j := 0; j < 5; j++ { | |||
if even { | |||
if cd&masks[j] > 0 { | |||
lon[0] = (lon[0] + lon[1]) / 2 | |||
} else { | |||
lon[1] = (lon[0] + lon[1]) / 2 | |||
} | |||
} else { | |||
if cd&masks[j] > 0 { | |||
lat[0] = (lat[0] + lat[1]) / 2 | |||
} else { | |||
lat[1] = (lat[0] + lat[1]) / 2 | |||
} | |||
} | |||
even = !even | |||
} | |||
} | |||
// Deinterleave the bits of X into 32-bit words containing the even and odd | |||
// bitlevels of X, respectively. | |||
func deinterleave(X uint64) (uint32, uint32) { | |||
return squash(X), squash(X >> 1) | |||
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2 | |||
} | |||
// BoundingBoxIntWithPrecision returns the region encoded by the integer | |||
// geohash with the specified precision. | |||
func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox { | |||
fullHash := hash << (64 - bits) | |||
latInt, lngInt := deinterleave(fullHash) | |||
lat := decodeRange(latInt, 90) | |||
lng := decodeRange(lngInt, 180) | |||
latErr, lngErr := errorWithPrecision(bits) | |||
return geoBox{ | |||
minLat: lat, | |||
maxLat: lat + latErr, | |||
minLng: lng, | |||
maxLng: lng + lngErr, | |||
func EncodeGeoHash(lat, lon float64) string { | |||
even := true | |||
lats := []float64{-90.0, 90.0} | |||
lons := []float64{-180.0, 180.0} | |||
precision := 12 | |||
var ch, bit uint64 | |||
var geoHash string | |||
for len(geoHash) < precision { | |||
if even { | |||
mid := (lons[0] + lons[1]) / 2 | |||
if lon > mid { | |||
ch |= masks[bit] | |||
lons[0] = mid | |||
} else { | |||
lons[1] = mid | |||
} | |||
} else { | |||
mid := (lats[0] + lats[1]) / 2 | |||
if lat > mid { | |||
ch |= masks[bit] | |||
lats[0] = mid | |||
} else { | |||
lats[1] = mid | |||
} | |||
} | |||
even = !even | |||
if bit < 4 { | |||
bit++ | |||
} else { | |||
geoHash += string(base32encoding.enc[ch]) | |||
ch = 0 | |||
bit = 0 | |||
} | |||
} | |||
} | |||
// ---------------------------------------------------------------------- | |||
// Decode the string geohash to a (lat, lng) point. | |||
func GeoHashDecode(hash string) (lat, lng float64) { | |||
box := geoBoundingBox(hash) | |||
return box.round() | |||
return geoHash | |||
} |
@@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||
} | |||
} else { | |||
// geohash | |||
lat, lon = GeoHashDecode(geoStr) | |||
lat, lon = DecodeGeoHash(geoStr) | |||
foundLat = true | |||
foundLon = true | |||
} |
@@ -117,6 +117,8 @@ func (b *Batch) String() string { | |||
// be re-used in the future. | |||
func (b *Batch) Reset() { | |||
b.internal.Reset() | |||
b.lastDocSize = 0 | |||
b.totalSize = 0 | |||
} | |||
func (b *Batch) Merge(o *Batch) { |
@@ -121,6 +121,10 @@ type IndexReaderOnly interface { | |||
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) | |||
} | |||
type IndexReaderContains interface { | |||
FieldDictContains(field string) (FieldDictContains, error) | |||
} | |||
// FieldTerms contains the terms used by a document, keyed by field | |||
type FieldTerms map[string][]string | |||
@@ -230,6 +234,10 @@ type FieldDict interface { | |||
Close() error | |||
} | |||
type FieldDictContains interface { | |||
Contains(key []byte) (bool, error) | |||
} | |||
// DocIDReader is the interface exposing enumeration of documents identifiers. | |||
// Close the reader to release associated resources. | |||
type DocIDReader interface { |
@@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||
fileSegments++ | |||
} | |||
} | |||
} | |||
// before the newMerge introduction, need to clean the newly | |||
@@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||
} | |||
} | |||
} | |||
// In case where all the docs in the newly merged segment getting | |||
// deleted by the time we reach here, can skip the introduction. | |||
if nextMerge.new != nil && | |||
@@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response | |||
newSnapshot.updateSize() | |||
s.rootLock.Lock() | |||
// swap in new index snapshot | |||
newSnapshot.epoch = s.nextSnapshotEpoch | |||
@@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | |||
} | |||
newSnapshot.updateSize() | |||
// swap in new snapshot | |||
rootPrev := s.root | |||
s.root = newSnapshot |
@@ -18,6 +18,7 @@ import ( | |||
"encoding/json" | |||
"fmt" | |||
"os" | |||
"strings" | |||
"sync/atomic" | |||
"time" | |||
@@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) | |||
return nil | |||
} | |||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) | |||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) | |||
// process tasks in serial for now | |||
var notifications []chan *IndexSnapshot | |||
var filenames []string | |||
for _, task := range resultMergePlan.Tasks { | |||
if len(task.Segments) == 0 { | |||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) | |||
@@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
segmentsToMerge = append(segmentsToMerge, zapSeg) | |||
docsToDrop = append(docsToDrop, segSnapshot.deleted) | |||
} | |||
// track the files getting merged for unsetting the | |||
// removal ineligibility. This helps to unflip files | |||
// even with fast merger, slow persister work flows. | |||
path := zapSeg.Path() | |||
filenames = append(filenames, | |||
strings.TrimPrefix(path, s.path+string(os.PathSeparator))) | |||
} | |||
} | |||
} | |||
@@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) | |||
return err | |||
} | |||
err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment)) | |||
if err != nil { | |||
s.unmarkIneligibleForRemoval(filename) | |||
return fmt.Errorf("merge validation failed: %v", err) | |||
} | |||
oldNewDocNums = make(map[uint64][]uint64) | |||
for i, segNewDocNums := range newDocNums { | |||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums | |||
@@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||
} | |||
} | |||
// once all the newly merged segment introductions are done, | |||
// its safe to unflip the removal ineligibility for the replaced | |||
// older segments | |||
for _, f := range filenames { | |||
s.unmarkIneligibleForRemoval(f) | |||
} | |||
return nil | |||
} | |||
@@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, | |||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1) | |||
return nil, 0, err | |||
} | |||
err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment)) | |||
if err != nil { | |||
return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err) | |||
} | |||
// update persisted stats | |||
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) |
@@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() { | |||
var persistWatchers []*epochWatcher | |||
var lastPersistedEpoch, lastMergedEpoch uint64 | |||
var ew *epochWatcher | |||
var unpersistedCallbacks []index.BatchCallback | |||
po, err := s.parsePersisterOptions() | |||
if err != nil { | |||
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err)) | |||
@@ -111,7 +114,6 @@ OUTER: | |||
if ew != nil && ew.epoch > lastMergedEpoch { | |||
lastMergedEpoch = ew.epoch | |||
} | |||
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, | |||
lastMergedEpoch, persistWatchers, po) | |||
@@ -150,11 +152,25 @@ OUTER: | |||
_ = ourSnapshot.DecRef() | |||
break OUTER | |||
} | |||
// save this current snapshot's persistedCallbacks, to invoke during | |||
// the retry attempt | |||
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) | |||
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) | |||
_ = ourSnapshot.DecRef() | |||
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) | |||
continue OUTER | |||
} | |||
if unpersistedCallbacks != nil { | |||
// in the event of this being a retry attempt for persisting a snapshot | |||
// that had earlier failed, prepend the persistedCallbacks associated | |||
// with earlier segment(s) to the latest persistedCallbacks | |||
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...) | |||
unpersistedCallbacks = nil | |||
} | |||
for i := range ourPersistedCallbacks { | |||
ourPersistedCallbacks[i](err) | |||
} | |||
@@ -179,7 +195,6 @@ OUTER: | |||
s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) | |||
if changed { | |||
s.removeOldData() | |||
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1) | |||
continue OUTER | |||
} | |||
@@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64, | |||
return watchersNext | |||
} | |||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, | |||
persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) { | |||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, | |||
lastMergedEpoch uint64, persistWatchers []*epochWatcher, | |||
po *persisterOptions) (uint64, []*epochWatcher) { | |||
// first, let the watchers proceed if they lag behind | |||
// First, let the watchers proceed if they lag behind | |||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | |||
// check the merger lag by counting the segment files on disk, | |||
// Check the merger lag by counting the segment files on disk, | |||
numFilesOnDisk, _ := s.diskFileStats() | |||
// On finding fewer files on disk, persister takes a short pause | |||
// for sufficient in-memory segments to pile up for the next | |||
// memory merge cum persist loop. | |||
// On finding too many files on disk, persister pause until the merger | |||
// catches up to reduce the segment file count under the threshold. | |||
// But if there is memory pressure, then skip this sleep maneuvers. | |||
numFilesOnDisk, _ := s.diskFileStats() | |||
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) && | |||
po.PersisterNapTimeMSec > 0 && s.paused() == 0 { | |||
select { | |||
@@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM | |||
return lastMergedEpoch, persistWatchers | |||
} | |||
// Finding too many files on disk could be due to two reasons. | |||
// 1. Too many older snapshots awaiting the clean up. | |||
// 2. The merger could be lagging behind on merging the disk files. | |||
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) { | |||
s.removeOldData() | |||
numFilesOnDisk, _ = s.diskFileStats() | |||
} | |||
// Persister pause until the merger catches up to reduce the segment | |||
// file count under the threshold. | |||
// But if there is memory pressure, then skip this sleep maneuvers. | |||
OUTER: | |||
for po.PersisterNapUnderNumFiles > 0 && | |||
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) && | |||
@@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { | |||
} | |||
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | |||
rv := &IndexSnapshot{ | |||
parent: s, | |||
internal: make(map[string][]byte), | |||
refs: 1, | |||
creator: "loadSnapshot", | |||
} | |||
var running uint64 | |||
c := snapshot.Cursor() | |||
for k, _ := c.First(); k != nil; k, _ = c.Next() { | |||
@@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | |||
running += segmentSnapshot.segment.Count() | |||
} | |||
} | |||
return rv, nil | |||
} | |||
@@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() { | |||
if err != nil { | |||
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) | |||
} | |||
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed)) | |||
if removed > 0 { | |||
err = s.removeOldZapFiles() | |||
if err != nil { | |||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) | |||
} | |||
err = s.removeOldZapFiles() | |||
if err != nil { | |||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) | |||
} | |||
} | |||
@@ -41,12 +41,14 @@ const Version uint8 = 2 | |||
var ErrClosed = fmt.Errorf("scorch closed") | |||
type Scorch struct { | |||
nextSegmentID uint64 | |||
stats Stats | |||
iStats internalStats | |||
readOnly bool | |||
version uint8 | |||
config map[string]interface{} | |||
analysisQueue *index.AnalysisQueue | |||
stats Stats | |||
nextSegmentID uint64 | |||
path string | |||
unsafeBatch bool | |||
@@ -73,8 +75,6 @@ type Scorch struct { | |||
onEvent func(event Event) | |||
onAsyncError func(err error) | |||
iStats internalStats | |||
pauseLock sync.RWMutex | |||
pauseCount uint64 | |||
@@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { | |||
// FIXME could sort ids list concurrent with analysis? | |||
if len(batch.IndexOps) > 0 { | |||
if numUpdates > 0 { | |||
go func() { | |||
for _, doc := range batch.IndexOps { | |||
if doc != nil { | |||
@@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} { | |||
m["CurOnDiskBytes"] = numBytesUsedDisk | |||
m["CurOnDiskFiles"] = numFilesOnDisk | |||
s.rootLock.RLock() | |||
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval)) | |||
s.rootLock.RUnlock() | |||
// TODO: consider one day removing these backwards compatible | |||
// names for apps using the old names | |||
m["updates"] = m["TotUpdates"] |
@@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte, | |||
return &EmptyDictionaryIterator{} | |||
} | |||
func (e *EmptyDictionary) Contains(key []byte) (bool, error) { | |||
return false, nil | |||
} | |||
type EmptyDictionaryIterator struct{} | |||
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { | |||
return nil, nil | |||
} | |||
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) { | |||
return false, nil | |||
} | |||
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { | |||
return nil, nil | |||
} |
@@ -19,7 +19,10 @@ | |||
package segment | |||
import "fmt" | |||
import ( | |||
"errors" | |||
"fmt" | |||
) | |||
const ( | |||
MaxVarintSize = 9 | |||
@@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) { | |||
} | |||
return b[length:], v, nil | |||
} | |||
// ------------------------------------------------------------ | |||
type MemUvarintReader struct { | |||
C int // index of next byte to read from S | |||
S []byte | |||
} | |||
func NewMemUvarintReader(s []byte) *MemUvarintReader { | |||
return &MemUvarintReader{S: s} | |||
} | |||
// Len returns the number of unread bytes. | |||
func (r *MemUvarintReader) Len() int { | |||
n := len(r.S) - r.C | |||
if n < 0 { | |||
return 0 | |||
} | |||
return n | |||
} | |||
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow") | |||
// ReadUvarint reads an encoded uint64. The original code this was | |||
// based on is at encoding/binary/ReadUvarint(). | |||
func (r *MemUvarintReader) ReadUvarint() (uint64, error) { | |||
var x uint64 | |||
var s uint | |||
var C = r.C | |||
var S = r.S | |||
for { | |||
b := S[C] | |||
C++ | |||
if b < 0x80 { | |||
r.C = C | |||
// why 63? The original code had an 'i += 1' loop var and | |||
// checked for i > 9 || i == 9 ...; but, we no longer | |||
// check for the i var, but instead check here for s, | |||
// which is incremented by 7. So, 7*9 == 63. | |||
// | |||
// why the "extra" >= check? The normal case is that s < | |||
// 63, so we check this single >= guard first so that we | |||
// hit the normal, nil-error return pathway sooner. | |||
if s >= 63 && (s > 63 || s == 63 && b > 1) { | |||
return 0, ErrMemUvarintReaderOverflow | |||
} | |||
return x | uint64(b)<<s, nil | |||
} | |||
x |= uint64(b&0x7f) << s | |||
s += 7 | |||
} | |||
} | |||
// SkipUvarint skips ahead one encoded uint64. | |||
func (r *MemUvarintReader) SkipUvarint() { | |||
for { | |||
b := r.S[r.C] | |||
r.C++ | |||
if b < 0x80 { | |||
return | |||
} | |||
} | |||
} | |||
// SkipBytes skips a count number of bytes. | |||
func (r *MemUvarintReader) SkipBytes(count int) { | |||
r.C = r.C + count | |||
} | |||
func (r *MemUvarintReader) Reset(s []byte) { | |||
r.C = 0 | |||
r.S = s | |||
} |
@@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string { | |||
s = s.Sub[0] | |||
} | |||
if s.Op == syntax.OpLiteral { | |||
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) { | |||
return string(s.Rune) | |||
} | |||
@@ -59,6 +59,8 @@ type TermDictionary interface { | |||
AutomatonIterator(a vellum.Automaton, | |||
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator | |||
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator | |||
Contains(key []byte) (bool, error) | |||
} | |||
type DictionaryIterator interface { |
@@ -16,6 +16,7 @@ package zap | |||
import ( | |||
"bufio" | |||
"github.com/couchbase/vellum" | |||
"math" | |||
"os" | |||
) | |||
@@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, | |||
docValueOffset: docValueOffset, | |||
dictLocs: dictLocs, | |||
fieldDvReaders: make(map[uint16]*docValueReader), | |||
fieldFSTs: make(map[uint16]*vellum.FST), | |||
} | |||
sb.updateSize() | |||
@@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) | |||
return rv | |||
} | |||
func (d *Dictionary) Contains(key []byte) (bool, error) { | |||
return d.fst.Contains(key) | |||
} | |||
// Iterator returns an iterator for this dictionary | |||
func (d *Dictionary) Iterator() segment.DictionaryIterator { | |||
rv := &DictionaryIterator{ | |||
@@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator | |||
} | |||
// need to increment the end position to be inclusive | |||
endBytes := []byte(end) | |||
if endBytes[len(endBytes)-1] < 0xff { | |||
endBytes[len(endBytes)-1]++ | |||
} else { | |||
endBytes = append(endBytes, 0xff) | |||
var endBytes []byte | |||
if len(end) > 0 { | |||
endBytes = []byte(end) | |||
if endBytes[len(endBytes)-1] < 0xff { | |||
endBytes[len(endBytes)-1]++ | |||
} else { | |||
endBytes = append(endBytes, 0xff) | |||
} | |||
} | |||
if d.fst != nil { |
@@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error | |||
type docVisitState struct { | |||
dvrs map[uint16]*docValueReader | |||
segment *Segment | |||
segment *SegmentBase | |||
} | |||
type docValueReader struct { | |||
@@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, | |||
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { | |||
// get the docValue offset for the given fields | |||
if fieldDvLocStart == fieldNotUninverted { | |||
return nil, fmt.Errorf("loadFieldDocValueReader: "+ | |||
"no docValues found for field: %s", field) | |||
// no docValues found, nothing to do | |||
return nil, nil | |||
} | |||
// read the number of chunks, and chunk offsets position | |||
@@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, | |||
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) | |||
// acquire position of chunk offsets | |||
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen | |||
} else { | |||
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart) | |||
} | |||
fdvIter := &docValueReader{ | |||
@@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) { | |||
// VisitDocumentFieldTerms is an implementation of the | |||
// DocumentFieldTermVisitable interface | |||
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( | |||
segment.DocVisitState, error) { | |||
dvs, ok := dvsIn.(*docVisitState) | |||
@@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { | |||
// check if the chunk is already loaded | |||
if docInChunk != dvr.curChunkNumber() { | |||
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase) | |||
err := dvr.loadDvChunk(docInChunk, s) | |||
if err != nil { | |||
return dvs, err | |||
} | |||
@@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | |||
// VisitableDocValueFields returns the list of fields with | |||
// persisted doc value terms ready to be visitable using the | |||
// VisitDocumentFieldTerms method. | |||
func (s *Segment) VisitableDocValueFields() ([]string, error) { | |||
func (s *SegmentBase) VisitableDocValueFields() ([]string, error) { | |||
return s.fieldDvNames, nil | |||
} |
@@ -31,6 +31,14 @@ import ( | |||
var DefaultFileMergerBufferSize = 1024 * 1024 | |||
// ValidateMerge can be set by applications to perform additional checks | |||
// on a new segment produced by a merge, by default this does nothing. | |||
// Caller should provide EITHER segments or memSegments, but not both. | |||
// This API is experimental and may be removed at any time. | |||
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error { | |||
return nil | |||
} | |||
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc | |||
// Merge takes a slice of zap segments and bit masks describing which |
@@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100 | |||
var NewSegmentBufferNumResultsFactor float64 = 1.0 | |||
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 | |||
// ValidateDocFields can be set by applications to perform additional checks | |||
// on fields in a document being added to a new segment, by default it does | |||
// nothing. | |||
// This API is experimental and may be removed at any time. | |||
var ValidateDocFields = func(field document.Field) error { | |||
return nil | |||
} | |||
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded | |||
// SegmentBase from analysis results | |||
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, | |||
@@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() ( | |||
if opts.IncludeDocValues() { | |||
s.IncludeDocValues[fieldID] = true | |||
} | |||
err := ValidateDocFields(field) | |||
if err != nil { | |||
return 0, err | |||
} | |||
} | |||
var curr int |
@@ -15,10 +15,8 @@ | |||
package zap | |||
import ( | |||
"bytes" | |||
"encoding/binary" | |||
"fmt" | |||
"io" | |||
"math" | |||
"reflect" | |||
@@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, | |||
} | |||
rv.postings = p | |||
rv.includeFreqNorm = includeFreq || includeNorm | |||
rv.includeFreqNorm = includeFreq || includeNorm || includeLocs | |||
rv.includeLocs = includeLocs | |||
if p.normBits1Hit != 0 { | |||
@@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool, | |||
// Count returns the number of items on this postings list | |||
func (p *PostingsList) Count() uint64 { | |||
var n uint64 | |||
var n, e uint64 | |||
if p.normBits1Hit != 0 { | |||
n = 1 | |||
if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) { | |||
e = 1 | |||
} | |||
} else if p.postings != nil { | |||
n = p.postings.GetCardinality() | |||
} | |||
var e uint64 | |||
if p.except != nil { | |||
e = p.except.GetCardinality() | |||
} | |||
if n <= e { | |||
return 0 | |||
if p.except != nil { | |||
e = p.postings.AndCardinality(p.except) | |||
} | |||
} | |||
return n - e | |||
} | |||
@@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error { | |||
// PostingsIterator provides a way to iterate through the postings list | |||
type PostingsIterator struct { | |||
postings *PostingsList | |||
all roaring.IntIterable | |||
Actual roaring.IntIterable | |||
all roaring.IntPeekable | |||
Actual roaring.IntPeekable | |||
ActualBM *roaring.Bitmap | |||
currChunk uint32 | |||
currChunkFreqNorm []byte | |||
currChunkLoc []byte | |||
freqNormReader *bytes.Reader | |||
locReader *bytes.Reader | |||
freqNormReader *segment.MemUvarintReader | |||
locReader *segment.MemUvarintReader | |||
freqChunkOffsets []uint64 | |||
freqChunkStart uint64 | |||
@@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { | |||
end += e | |||
i.currChunkFreqNorm = i.postings.sb.mem[start:end] | |||
if i.freqNormReader == nil { | |||
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm) | |||
i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm) | |||
} else { | |||
i.freqNormReader.Reset(i.currChunkFreqNorm) | |||
} | |||
@@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error { | |||
end += e | |||
i.currChunkLoc = i.postings.sb.mem[start:end] | |||
if i.locReader == nil { | |||
i.locReader = bytes.NewReader(i.currChunkLoc) | |||
i.locReader = segment.NewMemUvarintReader(i.currChunkLoc) | |||
} else { | |||
i.locReader.Reset(i.currChunkLoc) | |||
} | |||
@@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) { | |||
return 1, i.normBits1Hit, false, nil | |||
} | |||
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader) | |||
freqHasLocs, err := i.freqNormReader.ReadUvarint() | |||
if err != nil { | |||
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err) | |||
} | |||
freq, hasLocs := decodeFreqHasLocs(freqHasLocs) | |||
normBits, err := binary.ReadUvarint(i.freqNormReader) | |||
normBits, err := i.freqNormReader.ReadUvarint() | |||
if err != nil { | |||
return 0, 0, false, fmt.Errorf("error reading norm: %v", err) | |||
} | |||
return freq, normBits, hasLocs, err | |||
return freq, normBits, hasLocs, nil | |||
} | |||
func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) { | |||
if i.normBits1Hit != 0 { | |||
return false, nil | |||
} | |||
freqHasLocs, err := i.freqNormReader.ReadUvarint() | |||
if err != nil { | |||
return false, fmt.Errorf("error reading freqHasLocs: %v", err) | |||
} | |||
i.freqNormReader.SkipUvarint() // Skip normBits. | |||
return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs. | |||
} | |||
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 { | |||
@@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) { | |||
} | |||
// readLocation processes all the integers on the stream representing a single | |||
// location. if you care about it, pass in a non-nil location struct, and we | |||
// will fill it. if you don't care about it, pass in nil and we safely consume | |||
// the contents. | |||
// location. | |||
func (i *PostingsIterator) readLocation(l *Location) error { | |||
// read off field | |||
fieldID, err := binary.ReadUvarint(i.locReader) | |||
fieldID, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading location field: %v", err) | |||
} | |||
// read off pos | |||
pos, err := binary.ReadUvarint(i.locReader) | |||
pos, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading location pos: %v", err) | |||
} | |||
// read off start | |||
start, err := binary.ReadUvarint(i.locReader) | |||
start, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading location start: %v", err) | |||
} | |||
// read off end | |||
end, err := binary.ReadUvarint(i.locReader) | |||
end, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading location end: %v", err) | |||
} | |||
// read off num array pos | |||
numArrayPos, err := binary.ReadUvarint(i.locReader) | |||
numArrayPos, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading location num array pos: %v", err) | |||
} | |||
// group these together for less branching | |||
if l != nil { | |||
l.field = i.postings.sb.fieldsInv[fieldID] | |||
l.pos = pos | |||
l.start = start | |||
l.end = end | |||
if cap(l.ap) < int(numArrayPos) { | |||
l.ap = make([]uint64, int(numArrayPos)) | |||
} else { | |||
l.ap = l.ap[:int(numArrayPos)] | |||
} | |||
l.field = i.postings.sb.fieldsInv[fieldID] | |||
l.pos = pos | |||
l.start = start | |||
l.end = end | |||
if cap(l.ap) < int(numArrayPos) { | |||
l.ap = make([]uint64, int(numArrayPos)) | |||
} else { | |||
l.ap = l.ap[:int(numArrayPos)] | |||
} | |||
// read off array positions | |||
for k := 0; k < int(numArrayPos); k++ { | |||
ap, err := binary.ReadUvarint(i.locReader) | |||
ap, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading array position: %v", err) | |||
} | |||
if l != nil { | |||
l.ap[k] = ap | |||
} | |||
l.ap[k] = ap | |||
} | |||
return nil | |||
@@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err | |||
} | |||
rv.locs = i.nextSegmentLocs[:0] | |||
numLocsBytes, err := binary.ReadUvarint(i.locReader) | |||
numLocsBytes, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err) | |||
} | |||
@@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() ( | |||
if hasLocs { | |||
startLoc := len(i.currChunkLoc) - i.locReader.Len() | |||
numLocsBytes, err := binary.ReadUvarint(i.locReader) | |||
numLocsBytes, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return 0, 0, 0, nil, nil, | |||
fmt.Errorf("error reading location nextBytes numLocs: %v", err) | |||
} | |||
// skip over all the location bytes | |||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) | |||
if err != nil { | |||
return 0, 0, 0, nil, nil, err | |||
} | |||
i.locReader.SkipBytes(int(numLocsBytes)) | |||
endLoc := len(i.currChunkLoc) - i.locReader.Len() | |||
bytesLoc = i.currChunkLoc[startLoc:endLoc] | |||
@@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, | |||
return i.nextDocNumAtOrAfterClean(atOrAfter) | |||
} | |||
n := i.Actual.Next() | |||
for uint64(n) < atOrAfter && i.Actual.HasNext() { | |||
n = i.Actual.Next() | |||
} | |||
if uint64(n) < atOrAfter { | |||
i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) | |||
if !i.Actual.HasNext() { | |||
// couldn't find anything | |||
return 0, false, nil | |||
} | |||
n := i.Actual.Next() | |||
allN := i.all.Next() | |||
nChunk := n / i.postings.sb.chunkFactor | |||
@@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, | |||
// no deletions) where the all bitmap is the same as the actual bitmap | |||
func (i *PostingsIterator) nextDocNumAtOrAfterClean( | |||
atOrAfter uint64) (uint64, bool, error) { | |||
n := i.Actual.Next() | |||
if !i.includeFreqNorm { | |||
for uint64(n) < atOrAfter && i.Actual.HasNext() { | |||
n = i.Actual.Next() | |||
} | |||
i.Actual.AdvanceIfNeeded(uint32(atOrAfter)) | |||
if uint64(n) < atOrAfter { | |||
if !i.Actual.HasNext() { | |||
return 0, false, nil // couldn't find anything | |||
} | |||
return uint64(n), true, nil | |||
return uint64(i.Actual.Next()), true, nil | |||
} | |||
// freq-norm's needed, so maintain freq-norm chunk reader | |||
sameChunkNexts := 0 // # of times we called Next() in the same chunk | |||
n := i.Actual.Next() | |||
nChunk := n / i.postings.sb.chunkFactor | |||
for uint64(n) < atOrAfter && i.Actual.HasNext() { | |||
@@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error { | |||
} | |||
// read off freq/offsets even though we don't care about them | |||
_, _, hasLocs, err := i.readFreqNormHasLocs() | |||
hasLocs, err := i.skipFreqNormReadHasLocs() | |||
if err != nil { | |||
return err | |||
} | |||
if i.includeLocs && hasLocs { | |||
numLocsBytes, err := binary.ReadUvarint(i.locReader) | |||
numLocsBytes, err := i.locReader.ReadUvarint() | |||
if err != nil { | |||
return fmt.Errorf("error reading location numLocsBytes: %v", err) | |||
} | |||
// skip over all the location bytes | |||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) | |||
if err != nil { | |||
return err | |||
} | |||
i.locReader.SkipBytes(int(numLocsBytes)) | |||
} | |||
return nil |
@@ -20,8 +20,8 @@ import ( | |||
"fmt" | |||
"io" | |||
"os" | |||
"reflect" | |||
"sync" | |||
"unsafe" | |||
"github.com/RoaringBitmap/roaring" | |||
"github.com/blevesearch/bleve/index/scorch/segment" | |||
@@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int | |||
func init() { | |||
var sb SegmentBase | |||
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) | |||
reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb)) | |||
} | |||
// Open returns a zap impl of a segment | |||
@@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) { | |||
mem: mm[0 : len(mm)-FooterSize], | |||
fieldsMap: make(map[string]uint16), | |||
fieldDvReaders: make(map[uint16]*docValueReader), | |||
fieldFSTs: make(map[uint16]*vellum.FST), | |||
}, | |||
f: f, | |||
mm: mm, | |||
@@ -101,6 +102,9 @@ type SegmentBase struct { | |||
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field | |||
fieldDvNames []string // field names cached in fieldDvReaders | |||
size uint64 | |||
m sync.Mutex | |||
fieldFSTs map[uint16]*vellum.FST | |||
} | |||
func (sb *SegmentBase) Size() int { | |||
@@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { | |||
dictStart := sb.dictLocs[rv.fieldID] | |||
if dictStart > 0 { | |||
// read the length of the vellum data | |||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) | |||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] | |||
if fstBytes != nil { | |||
var ok bool | |||
sb.m.Lock() | |||
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok { | |||
// read the length of the vellum data | |||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) | |||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] | |||
rv.fst, err = vellum.Load(fstBytes) | |||
if err != nil { | |||
sb.m.Unlock() | |||
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) | |||
} | |||
rv.fstReader, err = rv.fst.Reader() | |||
if err != nil { | |||
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) | |||
} | |||
sb.fieldFSTs[rv.fieldID] = rv.fst | |||
} | |||
sb.m.Unlock() | |||
rv.fstReader, err = rv.fst.Reader() | |||
if err != nil { | |||
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) | |||
} | |||
} | |||
} | |||
@@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) { | |||
} | |||
func (s *SegmentBase) loadDvReaders() error { | |||
if s.docValueOffset == fieldNotUninverted { | |||
if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 { | |||
return nil | |||
} | |||
@@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error { | |||
} | |||
read += uint64(n) | |||
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) | |||
fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) | |||
if err != nil { | |||
return err | |||
} | |||
if fieldDvReader != nil { | |||
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader | |||
s.fieldDvNames = append(s.fieldDvNames, field) |
@@ -28,13 +28,14 @@ import ( | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/index/scorch/segment" | |||
"github.com/couchbase/vellum" | |||
lev2 "github.com/couchbase/vellum/levenshtein2" | |||
lev "github.com/couchbase/vellum/levenshtein" | |||
) | |||
// re usable, threadsafe levenshtein builders | |||
var lb1, lb2 *lev2.LevenshteinAutomatonBuilder | |||
var lb1, lb2 *lev.LevenshteinAutomatonBuilder | |||
type asynchSegmentResult struct { | |||
dict segment.TermDictionary | |||
dictItr segment.DictionaryIterator | |||
index int | |||
@@ -51,11 +52,11 @@ func init() { | |||
var is interface{} = IndexSnapshot{} | |||
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) | |||
var err error | |||
lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true) | |||
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true) | |||
if err != nil { | |||
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) | |||
} | |||
lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true) | |||
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true) | |||
if err != nil { | |||
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) | |||
} | |||
@@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() { | |||
} | |||
} | |||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { | |||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, | |||
makeItr func(i segment.TermDictionary) segment.DictionaryIterator, | |||
randomLookup bool) (*IndexSnapshotFieldDict, error) { | |||
results := make(chan *asynchSegmentResult) | |||
for index, segment := range i.segment { | |||
@@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
if err != nil { | |||
results <- &asynchSegmentResult{err: err} | |||
} else { | |||
results <- &asynchSegmentResult{dictItr: makeItr(dict)} | |||
if randomLookup { | |||
results <- &asynchSegmentResult{dict: dict} | |||
} else { | |||
results <- &asynchSegmentResult{dictItr: makeItr(dict)} | |||
} | |||
} | |||
}(index, segment) | |||
} | |||
@@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
if asr.err != nil && err == nil { | |||
err = asr.err | |||
} else { | |||
next, err2 := asr.dictItr.Next() | |||
if err2 != nil && err == nil { | |||
err = err2 | |||
} | |||
if next != nil { | |||
if !randomLookup { | |||
next, err2 := asr.dictItr.Next() | |||
if err2 != nil && err == nil { | |||
err = err2 | |||
} | |||
if next != nil { | |||
rv.cursors = append(rv.cursors, &segmentDictCursor{ | |||
itr: asr.dictItr, | |||
curr: *next, | |||
}) | |||
} | |||
} else { | |||
rv.cursors = append(rv.cursors, &segmentDictCursor{ | |||
itr: asr.dictItr, | |||
curr: *next, | |||
dict: asr.dict, | |||
}) | |||
} | |||
} | |||
@@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
if err != nil { | |||
return nil, err | |||
} | |||
// prepare heap | |||
heap.Init(rv) | |||
if !randomLookup { | |||
// prepare heap | |||
heap.Init(rv) | |||
} | |||
return rv, nil | |||
} | |||
@@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { | |||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
return i.Iterator() | |||
}) | |||
}, false) | |||
} | |||
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, | |||
endTerm []byte) (index.FieldDict, error) { | |||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
return i.RangeIterator(string(startTerm), string(endTerm)) | |||
}) | |||
}, false) | |||
} | |||
func (i *IndexSnapshot) FieldDictPrefix(field string, | |||
termPrefix []byte) (index.FieldDict, error) { | |||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
return i.PrefixIterator(string(termPrefix)) | |||
}) | |||
}, false) | |||
} | |||
func (i *IndexSnapshot) FieldDictRegexp(field string, | |||
@@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string, | |||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
return i.AutomatonIterator(a, prefixBeg, prefixEnd) | |||
}) | |||
}, false) | |||
} | |||
func (i *IndexSnapshot) getLevAutomaton(term string, | |||
@@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string, | |||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
return i.AutomatonIterator(a, prefixBeg, prefixEnd) | |||
}) | |||
}, false) | |||
} | |||
func (i *IndexSnapshot) FieldDictOnly(field string, | |||
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { | |||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | |||
return i.OnlyIterator(onlyTerms, includeCount) | |||
}) | |||
}, false) | |||
} | |||
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) { | |||
return i.newIndexSnapshotFieldDict(field, nil, true) | |||
} | |||
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { |
@@ -22,6 +22,7 @@ import ( | |||
) | |||
type segmentDictCursor struct { | |||
dict segment.TermDictionary | |||
itr segment.DictionaryIterator | |||
curr index.DictEntry | |||
} | |||
@@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { | |||
func (i *IndexSnapshotFieldDict) Close() error { | |||
return nil | |||
} | |||
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) { | |||
if len(i.cursors) == 0 { | |||
return false, nil | |||
} | |||
for _, cursor := range i.cursors { | |||
if found, _ := cursor.dict.Contains(key); found { | |||
return true, nil | |||
} | |||
} | |||
return false, nil | |||
} |
@@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { | |||
} | |||
type cachedDocs struct { | |||
size uint64 | |||
m sync.Mutex // As the cache is asynchronously prepared, need a lock | |||
cache map[string]*cachedFieldDocs // Keyed by field | |||
size uint64 | |||
} | |||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { |
@@ -107,6 +107,9 @@ type Stats struct { | |||
TotFileMergeIntroductionsDone uint64 | |||
TotFileMergeIntroductionsSkipped uint64 | |||
CurFilesIneligibleForRemoval uint64 | |||
TotSnapshotsRemovedFromMetaStore uint64 | |||
TotMemMergeBeg uint64 | |||
TotMemMergeErr uint64 | |||
TotMemMergeDone uint64 |
@@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error { | |||
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
// do analysis before acquiring write lock | |||
analysisStart := time.Now() | |||
numPlainTextBytes := doc.NumPlainTextBytes() | |||
resultChan := make(chan *index.AnalysisResult) | |||
aw := index.NewAnalysisWork(udc, doc, resultChan) | |||
@@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
return | |||
} | |||
return udc.UpdateWithAnalysis(doc, result, backIndexRow) | |||
} | |||
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document, | |||
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) { | |||
// start a writer for this update | |||
indexStart := time.Now() | |||
var kvwriter store.KVWriter | |||
@@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) | |||
if err == nil { | |||
atomic.AddUint64(&udc.stats.updates, 1) | |||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) | |||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes()) | |||
} else { | |||
atomic.AddUint64(&udc.stats.errors, 1) | |||
} | |||
@@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) [] | |||
} | |||
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||
persistedCallback := batch.PersistedCallback() | |||
if persistedCallback != nil { | |||
defer persistedCallback(err) | |||
} | |||
analysisStart := time.Now() | |||
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) | |||
@@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||
} | |||
} | |||
if len(batch.IndexOps) > 0 { | |||
if numUpdates > 0 { | |||
go func() { | |||
for _, doc := range batch.IndexOps { | |||
if doc != nil { | |||
@@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||
atomic.AddUint64(&udc.stats.errors, 1) | |||
} | |||
persistedCallback := batch.PersistedCallback() | |||
if persistedCallback != nil { | |||
persistedCallback(err) | |||
} | |||
return | |||
} | |||
@@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { | |||
Sort: req.Sort.Copy(), | |||
IncludeLocations: req.IncludeLocations, | |||
Score: req.Score, | |||
SearchAfter: req.SearchAfter, | |||
SearchBefore: req.SearchBefore, | |||
} | |||
return &rv | |||
} | |||
@@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se | |||
searchStart := time.Now() | |||
asyncResults := make(chan *asyncSearchResult, len(indexes)) | |||
var reverseQueryExecution bool | |||
if req.SearchBefore != nil { | |||
reverseQueryExecution = true | |||
req.Sort.Reverse() | |||
req.SearchAfter = req.SearchBefore | |||
req.SearchBefore = nil | |||
} | |||
// run search on each index in separate go routine | |||
var waitGroup sync.WaitGroup | |||
@@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se | |||
// sort all hits with the requested order | |||
if len(req.Sort) > 0 { | |||
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) | |||
sorter := newSearchHitSorter(req.Sort, sr.Hits) | |||
sort.Sort(sorter) | |||
} | |||
@@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se | |||
sr.Facets.Fixup(name, fr.Size) | |||
} | |||
if reverseQueryExecution { | |||
// reverse the sort back to the original | |||
req.Sort.Reverse() | |||
// resort using the original order | |||
mhs := newSearchHitSorter(req.Sort, sr.Hits) | |||
sort.Sort(mhs) | |||
// reset request | |||
req.SearchBefore = req.SearchAfter | |||
req.SearchAfter = nil | |||
} | |||
// fix up original request | |||
sr.Request = req | |||
searchDuration := time.Since(searchStart) | |||
@@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error { | |||
defer f.index.mutex.RUnlock() | |||
return f.fieldDict.Close() | |||
} | |||
type multiSearchHitSorter struct { | |||
hits search.DocumentMatchCollection | |||
sort search.SortOrder | |||
cachedScoring []bool | |||
cachedDesc []bool | |||
} | |||
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { | |||
return &multiSearchHitSorter{ | |||
sort: sort, | |||
hits: hits, | |||
cachedScoring: sort.CacheIsScore(), | |||
cachedDesc: sort.CacheDescending(), | |||
} | |||
} | |||
func (m *multiSearchHitSorter) Len() int { return len(m.hits) } | |||
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } | |||
func (m *multiSearchHitSorter) Less(i, j int) bool { | |||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) | |||
return c < 0 | |||
} |
@@ -19,6 +19,7 @@ import ( | |||
"encoding/json" | |||
"fmt" | |||
"os" | |||
"sort" | |||
"sync" | |||
"sync/atomic" | |||
"time" | |||
@@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
return nil, ErrorIndexClosed | |||
} | |||
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) | |||
var reverseQueryExecution bool | |||
if req.SearchBefore != nil { | |||
reverseQueryExecution = true | |||
req.Sort.Reverse() | |||
req.SearchAfter = req.SearchBefore | |||
req.SearchBefore = nil | |||
} | |||
var coll *collector.TopNCollector | |||
if req.SearchAfter != nil { | |||
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter) | |||
} else { | |||
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort) | |||
} | |||
// open a reader for this search | |||
indexReader, err := i.i.Reader() | |||
@@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
facetsBuilder.Add(facetName, facetBuilder) | |||
} | |||
} | |||
collector.SetFacetsBuilder(facetsBuilder) | |||
coll.SetFacetsBuilder(facetsBuilder) | |||
} | |||
memNeeded := memNeededForSearch(req, searcher, collector) | |||
memNeeded := memNeededForSearch(req, searcher, coll) | |||
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { | |||
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { | |||
err = cbF(memNeeded) | |||
@@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
} | |||
} | |||
err = collector.Collect(ctx, searcher, indexReader) | |||
err = coll.Collect(ctx, searcher, indexReader) | |||
if err != nil { | |||
return nil, err | |||
} | |||
hits := collector.Results() | |||
hits := coll.Results() | |||
var highlighter highlight.Highlighter | |||
@@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
} | |||
for _, hit := range hits { | |||
if len(req.Fields) > 0 || highlighter != nil { | |||
doc, err := indexReader.Document(hit.ID) | |||
if err == nil && doc != nil { | |||
if len(req.Fields) > 0 { | |||
fieldsToLoad := deDuplicate(req.Fields) | |||
for _, f := range fieldsToLoad { | |||
for _, docF := range doc.Fields { | |||
if f == "*" || docF.Name() == f { | |||
var value interface{} | |||
switch docF := docF.(type) { | |||
case *document.TextField: | |||
value = string(docF.Value()) | |||
case *document.NumericField: | |||
num, err := docF.Number() | |||
if err == nil { | |||
value = num | |||
} | |||
case *document.DateTimeField: | |||
datetime, err := docF.DateTime() | |||
if err == nil { | |||
value = datetime.Format(time.RFC3339) | |||
} | |||
case *document.BooleanField: | |||
boolean, err := docF.Boolean() | |||
if err == nil { | |||
value = boolean | |||
} | |||
case *document.GeoPointField: | |||
lon, err := docF.Lon() | |||
if err == nil { | |||
lat, err := docF.Lat() | |||
if err == nil { | |||
value = []float64{lon, lat} | |||
} | |||
} | |||
} | |||
if value != nil { | |||
hit.AddFieldValue(docF.Name(), value) | |||
} | |||
} | |||
} | |||
} | |||
} | |||
if highlighter != nil { | |||
highlightFields := req.Highlight.Fields | |||
if highlightFields == nil { | |||
// add all fields with matches | |||
highlightFields = make([]string, 0, len(hit.Locations)) | |||
for k := range hit.Locations { | |||
highlightFields = append(highlightFields, k) | |||
} | |||
} | |||
for _, hf := range highlightFields { | |||
highlighter.BestFragmentsInField(hit, doc, hf, 1) | |||
} | |||
} | |||
} else if doc == nil { | |||
// unexpected case, a doc ID that was found as a search hit | |||
// was unable to be found during document lookup | |||
return nil, ErrorIndexReadInconsistency | |||
} | |||
} | |||
if i.name != "" { | |||
hit.Index = i.name | |||
} | |||
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
atomic.AddUint64(&i.stats.searches, 1) | |||
@@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
logger.Printf("slow search took %s - %v", searchDuration, req) | |||
} | |||
if reverseQueryExecution { | |||
// reverse the sort back to the original | |||
req.Sort.Reverse() | |||
// resort using the original order | |||
mhs := newSearchHitSorter(req.Sort, hits) | |||
sort.Sort(mhs) | |||
// reset request | |||
req.SearchBefore = req.SearchAfter | |||
req.SearchAfter = nil | |||
} | |||
return &SearchResult{ | |||
Status: &SearchStatus{ | |||
Total: 1, | |||
@@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
}, | |||
Request: req, | |||
Hits: hits, | |||
Total: collector.Total(), | |||
MaxScore: collector.MaxScore(), | |||
Total: coll.Total(), | |||
MaxScore: coll.MaxScore(), | |||
Took: searchDuration, | |||
Facets: collector.FacetResults(), | |||
Facets: coll.FacetResults(), | |||
}, nil | |||
} | |||
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, | |||
indexName string, r index.IndexReader, | |||
highlighter highlight.Highlighter) error { | |||
if len(req.Fields) > 0 || highlighter != nil { | |||
doc, err := r.Document(hit.ID) | |||
if err == nil && doc != nil { | |||
if len(req.Fields) > 0 { | |||
fieldsToLoad := deDuplicate(req.Fields) | |||
for _, f := range fieldsToLoad { | |||
for _, docF := range doc.Fields { | |||
if f == "*" || docF.Name() == f { | |||
var value interface{} | |||
switch docF := docF.(type) { | |||
case *document.TextField: | |||
value = string(docF.Value()) | |||
case *document.NumericField: | |||
num, err := docF.Number() | |||
if err == nil { | |||
value = num | |||
} | |||
case *document.DateTimeField: | |||
datetime, err := docF.DateTime() | |||
if err == nil { | |||
value = datetime.Format(time.RFC3339) | |||
} | |||
case *document.BooleanField: | |||
boolean, err := docF.Boolean() | |||
if err == nil { | |||
value = boolean | |||
} | |||
case *document.GeoPointField: | |||
lon, err := docF.Lon() | |||
if err == nil { | |||
lat, err := docF.Lat() | |||
if err == nil { | |||
value = []float64{lon, lat} | |||
} | |||
} | |||
} | |||
if value != nil { | |||
hit.AddFieldValue(docF.Name(), value) | |||
} | |||
} | |||
} | |||
} | |||
} | |||
if highlighter != nil { | |||
highlightFields := req.Highlight.Fields | |||
if highlightFields == nil { | |||
// add all fields with matches | |||
highlightFields = make([]string, 0, len(hit.Locations)) | |||
for k := range hit.Locations { | |||
highlightFields = append(highlightFields, k) | |||
} | |||
} | |||
for _, hf := range highlightFields { | |||
highlighter.BestFragmentsInField(hit, doc, hf, 1) | |||
} | |||
} | |||
} else if doc == nil { | |||
// unexpected case, a doc ID that was found as a search hit | |||
// was unable to be found during document lookup | |||
return ErrorIndexReadInconsistency | |||
} | |||
} | |||
return nil | |||
} | |||
// Fields returns the name of all the fields this | |||
// Index has operated on. | |||
func (i *indexImpl) Fields() (fields []string, err error) { | |||
@@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string { | |||
} | |||
return ret | |||
} | |||
type searchHitSorter struct { | |||
hits search.DocumentMatchCollection | |||
sort search.SortOrder | |||
cachedScoring []bool | |||
cachedDesc []bool | |||
} | |||
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter { | |||
return &searchHitSorter{ | |||
sort: sort, | |||
hits: hits, | |||
cachedScoring: sort.CacheIsScore(), | |||
cachedDesc: sort.CacheDescending(), | |||
} | |||
} | |||
func (m *searchHitSorter) Len() int { return len(m.hits) } | |||
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } | |||
func (m *searchHitSorter) Less(i, j int) bool { | |||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) | |||
return c < 0 | |||
} |
@@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, | |||
if !propertyValue.IsNil() { | |||
switch property := property.(type) { | |||
case encoding.TextMarshaler: | |||
txt, err := property.MarshalText() | |||
if err == nil && subDocMapping != nil { | |||
// index by explicit mapping | |||
// ONLY process TextMarshaler if there is an explicit mapping | |||
// AND all of the fiels are of type text | |||
// OTHERWISE process field without TextMarshaler | |||
if subDocMapping != nil { | |||
allFieldsText := true | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
if fieldMapping.Type == "text" { | |||
fieldMapping.processString(string(txt), pathString, path, indexes, context) | |||
if fieldMapping.Type != "text" { | |||
allFieldsText = false | |||
break | |||
} | |||
} | |||
} else { | |||
dm.walkDocument(property, path, indexes, context) | |||
txt, err := property.MarshalText() | |||
if err == nil && allFieldsText { | |||
txtStr := string(txt) | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
fieldMapping.processString(txtStr, pathString, path, indexes, context) | |||
} | |||
return | |||
} | |||
} | |||
dm.walkDocument(property, path, indexes, context) | |||
default: | |||
dm.walkDocument(property, path, indexes, context) | |||
} |
@@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20 | |||
type PrefixCoded []byte | |||
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { | |||
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil) | |||
return rv, err | |||
} | |||
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) ( | |||
rv PrefixCoded, preallocRest []byte, err error) { | |||
if shift > 63 { | |||
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) | |||
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) | |||
} | |||
nChars := ((63 - shift) / 7) + 1 | |||
rv := make(PrefixCoded, nChars+1) | |||
size := int(nChars + 1) | |||
if len(prealloc) >= size { | |||
rv = PrefixCoded(prealloc[0:size]) | |||
preallocRest = prealloc[size:] | |||
} else { | |||
rv = make(PrefixCoded, size) | |||
} | |||
rv[0] = ShiftStartInt64 + byte(shift) | |||
sortableBits := int64(uint64(in) ^ 0x8000000000000000) | |||
@@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { | |||
nChars-- | |||
sortableBits = int64(uint64(sortableBits) >> 7) | |||
} | |||
return rv, nil | |||
return rv, preallocRest, nil | |||
} | |||
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { |
@@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) { | |||
// result score explanations. | |||
// Sort describes the desired order for the results to be returned. | |||
// Score controls the kind of scoring performed | |||
// SearchAfter supports deep paging by providing a minimum sort key | |||
// SearchBefore supports deep paging by providing a maximum sort key | |||
// | |||
// A special field named "*" can be used to return all fields. | |||
type SearchRequest struct { | |||
@@ -275,6 +277,8 @@ type SearchRequest struct { | |||
Sort search.SortOrder `json:"sort"` | |||
IncludeLocations bool `json:"includeLocations"` | |||
Score string `json:"score,omitempty"` | |||
SearchAfter []string `json:"search_after"` | |||
SearchBefore []string `json:"search_before"` | |||
} | |||
func (r *SearchRequest) Validate() error { | |||
@@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error { | |||
} | |||
} | |||
if r.SearchAfter != nil && r.SearchBefore != nil { | |||
return fmt.Errorf("cannot use search after and search before together") | |||
} | |||
if r.SearchAfter != nil { | |||
if r.From != 0 { | |||
return fmt.Errorf("cannot use search after with from !=0") | |||
} | |||
if len(r.SearchAfter) != len(r.Sort) { | |||
return fmt.Errorf("search after must have same size as sort order") | |||
} | |||
} | |||
if r.SearchBefore != nil { | |||
if r.From != 0 { | |||
return fmt.Errorf("cannot use search before with from !=0") | |||
} | |||
if len(r.SearchBefore) != len(r.Sort) { | |||
return fmt.Errorf("search before must have same size as sort order") | |||
} | |||
} | |||
return r.Facets.Validate() | |||
} | |||
@@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) { | |||
r.Sort = order | |||
} | |||
// SetSearchAfter sets the request to skip over hits with a sort | |||
// value less than the provided sort after key | |||
func (r *SearchRequest) SetSearchAfter(after []string) { | |||
r.SearchAfter = after | |||
} | |||
// SetSearchBefore sets the request to skip over hits with a sort | |||
// value greater than the provided sort before key | |||
func (r *SearchRequest) SetSearchBefore(before []string) { | |||
r.SearchBefore = before | |||
} | |||
// UnmarshalJSON deserializes a JSON representation of | |||
// a SearchRequest | |||
func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
@@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
Sort []json.RawMessage `json:"sort"` | |||
IncludeLocations bool `json:"includeLocations"` | |||
Score string `json:"score"` | |||
SearchAfter []string `json:"search_after"` | |||
SearchBefore []string `json:"search_before"` | |||
} | |||
err := json.Unmarshal(input, &temp) | |||
@@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
r.Facets = temp.Facets | |||
r.IncludeLocations = temp.IncludeLocations | |||
r.Score = temp.Score | |||
r.SearchAfter = temp.SearchAfter | |||
r.SearchBefore = temp.SearchBefore | |||
r.Query, err = query.ParseQuery(temp.Q) | |||
if err != nil { | |||
return err |
@@ -69,6 +69,7 @@ type TopNCollector struct { | |||
lowestMatchOutsideResults *search.DocumentMatch | |||
updateFieldVisitor index.DocumentFieldTermVisitor | |||
dvReader index.DocValueReader | |||
searchAfter *search.DocumentMatch | |||
} | |||
// CheckDoneEvery controls how frequently we check the context deadline | |||
@@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024) | |||
// skipping over the first 'skip' hits | |||
// ordering hits by the provided sort order | |||
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { | |||
return newTopNCollector(size, skip, sort) | |||
} | |||
// NewTopNCollector builds a collector to find the top 'size' hits | |||
// skipping over the first 'skip' hits | |||
// ordering hits by the provided sort order | |||
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector { | |||
rv := newTopNCollector(size, 0, sort) | |||
rv.searchAfter = &search.DocumentMatch{ | |||
Sort: after, | |||
} | |||
return rv | |||
} | |||
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { | |||
hc := &TopNCollector{size: size, skip: skip, sort: sort} | |||
// pre-allocate space on the store to avoid reslicing | |||
@@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||
searchContext := &search.SearchContext{ | |||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), | |||
Collector: hc, | |||
IndexReader: reader, | |||
} | |||
hc.dvReader, err = reader.DocValueReader(hc.neededFields) | |||
@@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler( | |||
if d == nil { | |||
return nil | |||
} | |||
// support search after based pagination, | |||
// if this hit is <= the search after sort key | |||
// we should skip it | |||
if hc.searchAfter != nil { | |||
// exact sort order matches use hit number to break tie | |||
// but we want to allow for exact match, so we pretend | |||
hc.searchAfter.HitNumber = d.HitNumber | |||
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 { | |||
return nil | |||
} | |||
} | |||
// optimization, we track lowest sorting hit already removed from heap | |||
// with this one comparison, we can avoid all heap operations if | |||
// this hit would have been added and then immediately removed |
@@ -41,6 +41,14 @@ type BleveQueryTime struct { | |||
time.Time | |||
} | |||
var MinRFC3339CompatibleTime time.Time | |||
var MaxRFC3339CompatibleTime time.Time | |||
func init() { | |||
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z") | |||
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z") | |||
} | |||
func queryTimeFromString(t string) (time.Time, error) { | |||
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser) | |||
if err != nil { | |||
@@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { | |||
min := math.Inf(-1) | |||
max := math.Inf(1) | |||
if !q.Start.IsZero() { | |||
min = numeric.Int64ToFloat64(q.Start.UnixNano()) | |||
if !isDatetimeCompatible(q.Start) { | |||
// overflow | |||
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start) | |||
} | |||
startInt64 := q.Start.UnixNano() | |||
min = numeric.Int64ToFloat64(startInt64) | |||
} | |||
if !q.End.IsZero() { | |||
max = numeric.Int64ToFloat64(q.End.UnixNano()) | |||
if !isDatetimeCompatible(q.End) { | |||
// overflow | |||
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End) | |||
} | |||
endInt64 := q.End.UnixNano() | |||
max = numeric.Int64ToFloat64(endInt64) | |||
} | |||
return &min, &max, nil | |||
@@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error { | |||
} | |||
return nil | |||
} | |||
func isDatetimeCompatible(t BleveQueryTime) bool { | |||
if QueryDateTimeFormat == time.RFC3339 && | |||
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) { | |||
return false | |||
} | |||
return true | |||
} |
@@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
if len(ss) < 1 { | |||
return searcher.NewMatchNoneSearcher(i) | |||
} else if len(ss) == 1 && int(q.Min) == ss[0].Min() { | |||
// apply optimization only if both conditions below are satisfied: | |||
// - disjunction searcher has only 1 child searcher | |||
// - parent searcher's min setting is equal to child searcher's min | |||
return ss[0], nil | |||
} | |||
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) |
@@ -0,0 +1,94 @@ | |||
// Copyright (c) 2019 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package query | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"github.com/blevesearch/bleve/geo" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/searcher" | |||
) | |||
type GeoBoundingPolygonQuery struct { | |||
Points []geo.Point `json:"polygon_points"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery { | |||
return &GeoBoundingPolygonQuery{ | |||
Points: points} | |||
} | |||
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) { | |||
boost := Boost(b) | |||
q.BoostVal = &boost | |||
} | |||
func (q *GeoBoundingPolygonQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *GeoBoundingPolygonQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *GeoBoundingPolygonQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader, | |||
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *GeoBoundingPolygonQuery) Validate() error { | |||
return nil | |||
} | |||
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error { | |||
tmp := struct { | |||
Points []interface{} `json:"polygon_points"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
}{} | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
q.Points = make([]geo.Point, 0, len(tmp.Points)) | |||
for _, i := range tmp.Points { | |||
// now use our generic point parsing code from the geo package | |||
lon, lat, found := geo.ExtractGeoPoint(i) | |||
if !found { | |||
return fmt.Errorf("geo polygon point: %v is not in a valid format", i) | |||
} | |||
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat}) | |||
} | |||
q.FieldVal = tmp.FieldVal | |||
q.BoostVal = tmp.BoostVal | |||
return nil | |||
} |
@@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) { | |||
} | |||
return &rv, nil | |||
} | |||
_, hasPoints := tmp["polygon_points"] | |||
if hasPoints { | |||
var rv GeoBoundingPolygonQuery | |||
err := json.Unmarshal(input, &rv) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &rv, nil | |||
} | |||
return nil, fmt.Errorf("unknown query type") | |||
} | |||
@@ -40,6 +40,7 @@ type TermQueryScorer struct { | |||
idf float64 | |||
options search.SearcherOptions | |||
idfExplanation *search.Explanation | |||
includeScore bool | |||
queryNorm float64 | |||
queryWeight float64 | |||
queryWeightExplanation *search.Explanation | |||
@@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int { | |||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { | |||
rv := TermQueryScorer{ | |||
queryTerm: string(queryTerm), | |||
queryField: queryField, | |||
queryBoost: queryBoost, | |||
docTerm: docTerm, | |||
docTotal: docTotal, | |||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), | |||
options: options, | |||
queryWeight: 1.0, | |||
queryTerm: string(queryTerm), | |||
queryField: queryField, | |||
queryBoost: queryBoost, | |||
docTerm: docTerm, | |||
docTotal: docTotal, | |||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), | |||
options: options, | |||
queryWeight: 1.0, | |||
includeScore: options.Score != "none", | |||
} | |||
if options.Explain { | |||
@@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { | |||
} | |||
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch { | |||
var scoreExplanation *search.Explanation | |||
// need to compute score | |||
var tf float64 | |||
if termMatch.Freq < MaxSqrtCache { | |||
tf = SqrtCache[int(termMatch.Freq)] | |||
} else { | |||
tf = math.Sqrt(float64(termMatch.Freq)) | |||
} | |||
score := tf * termMatch.Norm * s.idf | |||
if s.options.Explain { | |||
childrenExplanations := make([]*search.Explanation, 3) | |||
childrenExplanations[0] = &search.Explanation{ | |||
Value: tf, | |||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), | |||
} | |||
childrenExplanations[1] = &search.Explanation{ | |||
Value: termMatch.Norm, | |||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), | |||
} | |||
childrenExplanations[2] = s.idfExplanation | |||
scoreExplanation = &search.Explanation{ | |||
Value: score, | |||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), | |||
Children: childrenExplanations, | |||
rv := ctx.DocumentMatchPool.Get() | |||
// perform any score computations only when needed | |||
if s.includeScore || s.options.Explain { | |||
var scoreExplanation *search.Explanation | |||
var tf float64 | |||
if termMatch.Freq < MaxSqrtCache { | |||
tf = SqrtCache[int(termMatch.Freq)] | |||
} else { | |||
tf = math.Sqrt(float64(termMatch.Freq)) | |||
} | |||
} | |||
score := tf * termMatch.Norm * s.idf | |||
// if the query weight isn't 1, multiply | |||
if s.queryWeight != 1.0 { | |||
score = score * s.queryWeight | |||
if s.options.Explain { | |||
childExplanations := make([]*search.Explanation, 2) | |||
childExplanations[0] = s.queryWeightExplanation | |||
childExplanations[1] = scoreExplanation | |||
childrenExplanations := make([]*search.Explanation, 3) | |||
childrenExplanations[0] = &search.Explanation{ | |||
Value: tf, | |||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), | |||
} | |||
childrenExplanations[1] = &search.Explanation{ | |||
Value: termMatch.Norm, | |||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID), | |||
} | |||
childrenExplanations[2] = s.idfExplanation | |||
scoreExplanation = &search.Explanation{ | |||
Value: score, | |||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), | |||
Children: childExplanations, | |||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), | |||
Children: childrenExplanations, | |||
} | |||
} | |||
// if the query weight isn't 1, multiply | |||
if s.queryWeight != 1.0 { | |||
score = score * s.queryWeight | |||
if s.options.Explain { | |||
childExplanations := make([]*search.Explanation, 2) | |||
childExplanations[0] = s.queryWeightExplanation | |||
childExplanations[1] = scoreExplanation | |||
scoreExplanation = &search.Explanation{ | |||
Value: score, | |||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), | |||
Children: childExplanations, | |||
} | |||
} | |||
} | |||
if s.includeScore { | |||
rv.Score = score | |||
} | |||
if s.options.Explain { | |||
rv.Expl = scoreExplanation | |||
} | |||
} | |||
rv := ctx.DocumentMatchPool.Get() | |||
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) | |||
rv.Score = score | |||
if s.options.Explain { | |||
rv.Expl = scoreExplanation | |||
} | |||
if len(termMatch.Vectors) > 0 { | |||
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { |
@@ -17,6 +17,7 @@ package search | |||
import ( | |||
"fmt" | |||
"reflect" | |||
"sort" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/size" | |||
@@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool { | |||
return true | |||
} | |||
func (ap ArrayPositions) Compare(other ArrayPositions) int { | |||
for i, p := range ap { | |||
if i >= len(other) { | |||
return 1 | |||
} | |||
if p < other[i] { | |||
return -1 | |||
} | |||
if p > other[i] { | |||
return 1 | |||
} | |||
} | |||
if len(ap) < len(other) { | |||
return -1 | |||
} | |||
return 0 | |||
} | |||
type Location struct { | |||
// Pos is the position of the term within the field, starting at 1 | |||
Pos uint64 `json:"pos"` | |||
@@ -68,6 +87,46 @@ func (l *Location) Size() int { | |||
type Locations []*Location | |||
func (p Locations) Len() int { return len(p) } | |||
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] } | |||
func (p Locations) Less(i, j int) bool { | |||
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions) | |||
if c < 0 { | |||
return true | |||
} | |||
if c > 0 { | |||
return false | |||
} | |||
return p[i].Pos < p[j].Pos | |||
} | |||
func (p Locations) Dedupe() Locations { // destructive! | |||
if len(p) <= 1 { | |||
return p | |||
} | |||
sort.Sort(p) | |||
slow := 0 | |||
for _, pfast := range p { | |||
pslow := p[slow] | |||
if pslow.Pos == pfast.Pos && | |||
pslow.Start == pfast.Start && | |||
pslow.End == pfast.End && | |||
pslow.ArrayPositions.Equals(pfast.ArrayPositions) { | |||
continue // duplicate, so only move fast ahead | |||
} | |||
slow++ | |||
p[slow] = pfast | |||
} | |||
return p[:slow+1] | |||
} | |||
type TermLocationMap map[string]Locations | |||
func (t TermLocationMap) AddLocation(term string, location *Location) { | |||
@@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | |||
var lastField string | |||
var tlm TermLocationMap | |||
var needsDedupe bool | |||
for i, ftl := range dm.FieldTermLocations { | |||
if lastField != ftl.Field { | |||
@@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | |||
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) | |||
} | |||
tlm[ftl.Term] = append(tlm[ftl.Term], loc) | |||
locs := tlm[ftl.Term] | |||
// if the loc is before or at the last location, then there | |||
// might be duplicates that need to be deduplicated | |||
if !needsDedupe && len(locs) > 0 { | |||
last := locs[len(locs)-1] | |||
cmp := loc.ArrayPositions.Compare(last.ArrayPositions) | |||
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) { | |||
needsDedupe = true | |||
} | |||
} | |||
tlm[ftl.Term] = append(locs, loc) | |||
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle | |||
Location: Location{ | |||
@@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | |||
}, | |||
} | |||
} | |||
if needsDedupe { | |||
for _, tlm := range dm.Locations { | |||
for term, locs := range tlm { | |||
tlm[term] = locs.Dedupe() | |||
} | |||
} | |||
} | |||
} | |||
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle | |||
@@ -279,6 +359,7 @@ type SearcherOptions struct { | |||
type SearchContext struct { | |||
DocumentMatchPool *DocumentMatchPool | |||
Collector Collector | |||
IndexReader index.IndexReader | |||
} | |||
func (sc *SearchContext) Size() int { |
@@ -45,6 +45,7 @@ type BooleanSearcher struct { | |||
scorer *scorer.ConjunctionQueryScorer | |||
matches []*search.DocumentMatch | |||
initialized bool | |||
done bool | |||
} | |||
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { | |||
@@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) { | |||
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | |||
if s.done { | |||
return nil, nil | |||
} | |||
if !s.initialized { | |||
err := s.initSearchers(ctx) | |||
if err != nil { | |||
@@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch | |||
} | |||
} | |||
if rv == nil { | |||
s.done = true | |||
} | |||
return rv, nil | |||
} | |||
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { | |||
if s.done { | |||
return nil, nil | |||
} | |||
if !s.initialized { | |||
err := s.initSearchers(ctx) | |||
if err != nil { | |||
@@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||
} | |||
} | |||
// Advance the searchers only if the currentID cursor is trailing the lookup ID, | |||
// additionally if the mustNotSearcher has been initialized, ensure that the | |||
// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by | |||
// currentID) is trailing the lookup ID as well - for in the case where currentID | |||
// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT | |||
// advance the currentID or the currMustNot cursors. | |||
if (s.currentID == nil || s.currentID.Compare(ID) < 0) && | |||
(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) { | |||
// Advance the searcher only if the cursor is trailing the lookup ID | |||
if s.currentID == nil || s.currentID.Compare(ID) < 0 { | |||
var err error | |||
if s.mustSearcher != nil { | |||
if s.currMust != nil { | |||
@@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||
} | |||
if s.mustNotSearcher != nil { | |||
if s.currMustNot != nil { | |||
ctx.DocumentMatchPool.Put(s.currMustNot) | |||
} | |||
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) | |||
if err != nil { | |||
return nil, err | |||
// Additional check for mustNotSearcher, whose cursor isn't tracked by | |||
// currentID to prevent it from moving when the searcher's tracked | |||
// position is already ahead of or at the requested ID. | |||
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 { | |||
if s.currMustNot != nil { | |||
ctx.DocumentMatchPool.Put(s.currMustNot) | |||
} | |||
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
} | |||
@@ -22,6 +22,11 @@ import ( | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
type filterFunc func(key []byte) bool | |||
var GeoBitsShift1 = (geo.GeoBits << 1) | |||
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1 | |||
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, | |||
maxLon, maxLat float64, field string, boost float64, | |||
options search.SearcherOptions, checkBoundaries bool) ( | |||
@@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, | |||
} | |||
// do math to produce list of terms needed for this search | |||
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1, | |||
minLon, minLat, maxLon, maxLat, checkBoundaries) | |||
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1, | |||
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var onBoundarySearcher search.Searcher | |||
dvReader, err := indexReader.DocValueReader([]string{field}) | |||
@@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4 | |||
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 | |||
func ComputeGeoRange(term uint64, shift uint, | |||
sminLon, sminLat, smaxLon, smaxLat float64, | |||
checkBoundaries bool) ( | |||
onBoundary [][]byte, notOnBoundary [][]byte) { | |||
split := term | uint64(0x1)<<shift | |||
var upperMax uint64 | |||
if shift < 63 { | |||
upperMax = term | ((uint64(1) << (shift + 1)) - 1) | |||
} else { | |||
upperMax = 0xffffffffffffffff | |||
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool, | |||
indexReader index.IndexReader, field string) ( | |||
onBoundary [][]byte, notOnBoundary [][]byte, err error) { | |||
preallocBytesLen := 32 | |||
preallocBytes := make([]byte, preallocBytesLen) | |||
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) { | |||
if len(preallocBytes) <= 0 { | |||
preallocBytesLen = preallocBytesLen * 2 | |||
preallocBytes = make([]byte, preallocBytesLen) | |||
} | |||
rv, preallocBytes, err = | |||
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes) | |||
return rv | |||
} | |||
var fieldDict index.FieldDictContains | |||
var isIndexed filterFunc | |||
if irr, ok := indexReader.(index.IndexReaderContains); ok { | |||
fieldDict, err = irr.FieldDictContains(field) | |||
if err != nil { | |||
return nil, nil, err | |||
} | |||
isIndexed = func(term []byte) bool { | |||
found, err := fieldDict.Contains(term) | |||
return err == nil && found | |||
} | |||
} | |||
lowerMax := split - 1 | |||
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift, | |||
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) | |||
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift, | |||
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries) | |||
onBoundary = append(onBoundary, plusOnBoundary...) | |||
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...) | |||
return | |||
} | |||
func relateAndRecurse(start, end uint64, res uint, | |||
sminLon, sminLat, smaxLon, smaxLat float64, | |||
checkBoundaries bool) ( | |||
onBoundary [][]byte, notOnBoundary [][]byte) { | |||
minLon := geo.MortonUnhashLon(start) | |||
minLat := geo.MortonUnhashLat(start) | |||
maxLon := geo.MortonUnhashLon(end) | |||
maxLat := geo.MortonUnhashLat(end) | |||
level := ((geo.GeoBits << 1) - res) >> 1 | |||
within := res%document.GeoPrecisionStep == 0 && | |||
geo.RectWithin(minLon, minLat, maxLon, maxLat, | |||
sminLon, sminLat, smaxLon, smaxLat) | |||
if within || (level == geoDetailLevel && | |||
geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
sminLon, sminLat, smaxLon, smaxLat)) { | |||
if !within && checkBoundaries { | |||
return [][]byte{ | |||
numeric.MustNewPrefixCodedInt64(int64(start), res), | |||
}, nil | |||
defer func() { | |||
if fieldDict != nil { | |||
if fd, ok := fieldDict.(index.FieldDict); ok { | |||
cerr := fd.Close() | |||
if cerr != nil { | |||
err = cerr | |||
} | |||
} | |||
} | |||
return nil, | |||
[][]byte{ | |||
numeric.MustNewPrefixCodedInt64(int64(start), res), | |||
}() | |||
if isIndexed == nil { | |||
isIndexed = func(term []byte) bool { | |||
if indexReader != nil { | |||
reader, err := indexReader.TermFieldReader(term, field, false, false, false) | |||
if err != nil || reader == nil { | |||
return false | |||
} | |||
if reader.Count() == 0 { | |||
_ = reader.Close() | |||
return false | |||
} | |||
_ = reader.Close() | |||
} | |||
} else if level < geoDetailLevel && | |||
geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
sminLon, sminLat, smaxLon, smaxLat) { | |||
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat, | |||
checkBoundaries) | |||
return true | |||
} | |||
} | |||
return nil, nil | |||
var computeGeoRange func(term uint64, shift uint) // declare for recursion | |||
relateAndRecurse := func(start, end uint64, res, level uint) { | |||
minLon := geo.MortonUnhashLon(start) | |||
minLat := geo.MortonUnhashLat(start) | |||
maxLon := geo.MortonUnhashLon(end) | |||
maxLat := geo.MortonUnhashLat(end) | |||
within := res%document.GeoPrecisionStep == 0 && | |||
geo.RectWithin(minLon, minLat, maxLon, maxLat, | |||
sminLon, sminLat, smaxLon, smaxLat) | |||
if within || (level == geoDetailLevel && | |||
geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
sminLon, sminLat, smaxLon, smaxLat)) { | |||
codedTerm := makePrefixCoded(int64(start), res) | |||
if isIndexed(codedTerm) { | |||
if !within && checkBoundaries { | |||
onBoundary = append(onBoundary, codedTerm) | |||
} else { | |||
notOnBoundary = append(notOnBoundary, codedTerm) | |||
} | |||
} | |||
} else if level < geoDetailLevel && | |||
geo.RectIntersects(minLon, minLat, maxLon, maxLat, | |||
sminLon, sminLat, smaxLon, smaxLat) { | |||
computeGeoRange(start, res-1) | |||
} | |||
} | |||
computeGeoRange = func(term uint64, shift uint) { | |||
if err != nil { | |||
return | |||
} | |||
split := term | uint64(0x1)<<shift | |||
var upperMax uint64 | |||
if shift < 63 { | |||
upperMax = term | ((uint64(1) << (shift + 1)) - 1) | |||
} else { | |||
upperMax = 0xffffffffffffffff | |||
} | |||
lowerMax := split - 1 | |||
level := (GeoBitsShift1 - shift) >> 1 | |||
relateAndRecurse(term, lowerMax, shift, level) | |||
relateAndRecurse(split, upperMax, shift, level) | |||
} | |||
computeGeoRange(term, shift) | |||
if err != nil { | |||
return nil, nil, err | |||
} | |||
return onBoundary, notOnBoundary, err | |||
} | |||
func buildRectFilter(dvReader index.DocValueReader, field string, |
@@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, | |||
// build a searcher for the box | |||
boxSearcher, err := boxSearcher(indexReader, | |||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, | |||
field, boost, options) | |||
field, boost, options, false) | |||
if err != nil { | |||
return nil, err | |||
} | |||
@@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon, | |||
// two boxes joined through a disjunction searcher | |||
func boxSearcher(indexReader index.IndexReader, | |||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, | |||
field string, boost float64, options search.SearcherOptions) ( | |||
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) ( | |||
search.Searcher, error) { | |||
if bottomRightLon < topLeftLon { | |||
// cross date line, rewrite as two parts | |||
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, | |||
-180, bottomRightLat, bottomRightLon, topLeftLat, | |||
field, boost, options, false) | |||
field, boost, options, checkBoundaries) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, | |||
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false) | |||
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, | |||
checkBoundaries) | |||
if err != nil { | |||
_ = leftSearcher.Close() | |||
return nil, err | |||
@@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader, | |||
// build geoboundinggox searcher for that bounding box | |||
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, | |||
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, | |||
options, false) | |||
options, checkBoundaries) | |||
if err != nil { | |||
return nil, err | |||
} |
@@ -0,0 +1,110 @@ | |||
// Copyright (c) 2019 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package searcher | |||
import ( | |||
"github.com/blevesearch/bleve/geo" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/numeric" | |||
"github.com/blevesearch/bleve/search" | |||
"math" | |||
) | |||
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader, | |||
polygon []geo.Point, field string, boost float64, | |||
options search.SearcherOptions) (search.Searcher, error) { | |||
// compute the bounding box enclosing the polygon | |||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err := | |||
geo.BoundingRectangleForPolygon(polygon) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// build a searcher for the bounding box on the polygon | |||
boxSearcher, err := boxSearcher(indexReader, | |||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, | |||
field, boost, options, true) | |||
if err != nil { | |||
return nil, err | |||
} | |||
dvReader, err := indexReader.DocValueReader([]string{field}) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// wrap it in a filtering searcher that checks for the polygon inclusivity | |||
return NewFilteringSearcher(boxSearcher, | |||
buildPolygonFilter(dvReader, field, polygon)), nil | |||
} | |||
const float64EqualityThreshold = 1e-6 | |||
func almostEqual(a, b float64) bool { | |||
return math.Abs(a-b) <= float64EqualityThreshold | |||
} | |||
// buildPolygonFilter returns true if the point lies inside the | |||
// polygon. It is based on the ray-casting technique as referred | |||
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html | |||
func buildPolygonFilter(dvReader index.DocValueReader, field string, | |||
polygon []geo.Point) FilterFunc { | |||
return func(d *search.DocumentMatch) bool { | |||
var lon, lat float64 | |||
var found bool | |||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) { | |||
// only consider the values which are shifted 0 | |||
prefixCoded := numeric.PrefixCoded(term) | |||
shift, err := prefixCoded.Shift() | |||
if err == nil && shift == 0 { | |||
i64, err := prefixCoded.Int64() | |||
if err == nil { | |||
lon = geo.MortonUnhashLon(uint64(i64)) | |||
lat = geo.MortonUnhashLat(uint64(i64)) | |||
found = true | |||
} | |||
} | |||
}) | |||
// Note: this approach works for points which are strictly inside | |||
// the polygon. ie it might fail for certain points on the polygon boundaries. | |||
if err == nil && found { | |||
nVertices := len(polygon) | |||
var inside bool | |||
// check for a direct vertex match | |||
if almostEqual(polygon[0].Lat, lat) && | |||
almostEqual(polygon[0].Lon, lon) { | |||
return true | |||
} | |||
for i := 1; i < nVertices; i++ { | |||
if almostEqual(polygon[i].Lat, lat) && | |||
almostEqual(polygon[i].Lon, lon) { | |||
return true | |||
} | |||
if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) && | |||
lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/ | |||
(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon { | |||
inside = !inside | |||
} | |||
} | |||
return inside | |||
} | |||
return false | |||
} | |||
} |
@@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, | |||
if !*inclusiveMax && maxInt64 != math.MinInt64 { | |||
maxInt64-- | |||
} | |||
var fieldDict index.FieldDictContains | |||
var isIndexed filterFunc | |||
var err error | |||
if irr, ok := indexReader.(index.IndexReaderContains); ok { | |||
fieldDict, err = irr.FieldDictContains(field) | |||
if err != nil { | |||
return nil, err | |||
} | |||
isIndexed = func(term []byte) bool { | |||
found, err := fieldDict.Contains(term) | |||
return err == nil && found | |||
} | |||
} | |||
// FIXME hard-coded precision, should match field declaration | |||
termRanges := splitInt64Range(minInt64, maxInt64, 4) | |||
terms := termRanges.Enumerate() | |||
terms := termRanges.Enumerate(isIndexed) | |||
if fieldDict != nil { | |||
if fd, ok := fieldDict.(index.FieldDict); ok { | |||
cerr := fd.Close() | |||
if cerr != nil { | |||
err = cerr | |||
} | |||
} | |||
} | |||
if len(terms) < 1 { | |||
// cannot return MatchNoneSearcher because of interaction with | |||
// commit f391b991c20f02681bacd197afc6d8aed444e132 | |||
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, | |||
true) | |||
} | |||
var err error | |||
terms, err = filterCandidateTerms(indexReader, terms, field) | |||
if err != nil { | |||
return nil, err | |||
// for upside_down | |||
if isIndexed == nil { | |||
terms, err = filterCandidateTerms(indexReader, terms, field) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
if tooManyClauses(len(terms)) { | |||
return nil, tooManyClausesErr(len(terms)) | |||
} | |||
@@ -125,11 +154,17 @@ type termRange struct { | |||
endTerm []byte | |||
} | |||
func (t *termRange) Enumerate() [][]byte { | |||
func (t *termRange) Enumerate(filter filterFunc) [][]byte { | |||
var rv [][]byte | |||
next := t.startTerm | |||
for bytes.Compare(next, t.endTerm) <= 0 { | |||
rv = append(rv, next) | |||
if filter != nil { | |||
if filter(next) { | |||
rv = append(rv, next) | |||
} | |||
} else { | |||
rv = append(rv, next) | |||
} | |||
next = incrementBytes(next) | |||
} | |||
return rv | |||
@@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte { | |||
type termRanges []*termRange | |||
func (tr termRanges) Enumerate() [][]byte { | |||
func (tr termRanges) Enumerate(filter filterFunc) [][]byte { | |||
var rv [][]byte | |||
for _, tri := range tr { | |||
trie := tri.Enumerate() | |||
trie := tri.Enumerate(filter) | |||
rv = append(rv, trie...) | |||
} | |||
return rv |
@@ -38,6 +38,8 @@ type SearchSort interface { | |||
RequiresScoring() bool | |||
RequiresFields() []string | |||
Reverse() | |||
Copy() SearchSort | |||
} | |||
@@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool { | |||
return rv | |||
} | |||
func (so SortOrder) Reverse() { | |||
for _, soi := range so { | |||
soi.Reverse() | |||
} | |||
} | |||
// SortFieldType lets you control some internal sort behavior | |||
// normally leaving this to the zero-value of SortFieldAuto is fine | |||
type SortFieldType int | |||
@@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort { | |||
return &rv | |||
} | |||
func (s *SortField) Reverse() { | |||
s.Desc = !s.Desc | |||
if s.Missing == SortFieldMissingFirst { | |||
s.Missing = SortFieldMissingLast | |||
} else { | |||
s.Missing = SortFieldMissingFirst | |||
} | |||
} | |||
// SortDocID will sort results by the document identifier | |||
type SortDocID struct { | |||
Desc bool | |||
@@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort { | |||
return &rv | |||
} | |||
func (s *SortDocID) Reverse() { | |||
s.Desc = !s.Desc | |||
} | |||
// SortScore will sort results by the document match score | |||
type SortScore struct { | |||
Desc bool | |||
@@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort { | |||
return &rv | |||
} | |||
func (s *SortScore) Reverse() { | |||
s.Desc = !s.Desc | |||
} | |||
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) | |||
// NewSortGeoDistance creates SearchSort instance for sorting documents by | |||
@@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort { | |||
return &rv | |||
} | |||
func (s *SortGeoDistance) Reverse() { | |||
s.Desc = !s.Desc | |||
} | |||
type BytesSlice [][]byte | |||
func (p BytesSlice) Len() int { return len(p) } |
@@ -1,10 +1,9 @@ | |||
language: go | |||
go: | |||
- 1.4 | |||
- 1.7 | |||
script: | |||
- go get golang.org/x/tools/cmd/vet | |||
- go get golang.org/x/tools/cmd/cover | |||
- go get github.com/mattn/goveralls | |||
- go test -v -covermode=count -coverprofile=profile.out |
@@ -18,7 +18,7 @@ import ( | |||
"bytes" | |||
) | |||
// Iterator represents a means of visity key/value pairs in order. | |||
// Iterator represents a means of visiting key/value pairs in order. | |||
type Iterator interface { | |||
// Current() returns the key/value pair currently pointed to. | |||
@@ -186,20 +186,29 @@ func (i *FSTIterator) Next() error { | |||
} | |||
func (i *FSTIterator) next(lastOffset int) error { | |||
// remember where we started | |||
// remember where we started with keysStack in this next() call | |||
i.nextStart = append(i.nextStart[:0], i.keysStack...) | |||
nextOffset := lastOffset + 1 | |||
allowCompare := false | |||
OUTER: | |||
for true { | |||
curr := i.statesStack[len(i.statesStack)-1] | |||
autCurr := i.autStatesStack[len(i.autStatesStack)-1] | |||
if curr.Final() && i.aut.IsMatch(autCurr) && | |||
bytes.Compare(i.keysStack, i.nextStart) > 0 { | |||
// in final state greater than start key | |||
return nil | |||
if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare { | |||
// check to see if new keystack might have gone too far | |||
if i.endKeyExclusive != nil && | |||
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { | |||
return ErrIteratorDone | |||
} | |||
cmp := bytes.Compare(i.keysStack, i.nextStart) | |||
if cmp > 0 { | |||
// in final state greater than start key | |||
return nil | |||
} | |||
} | |||
numTrans := curr.NumTransitions() | |||
@@ -207,8 +216,12 @@ OUTER: | |||
INNER: | |||
for nextOffset < numTrans { | |||
t := curr.TransitionAt(nextOffset) | |||
autNext := i.aut.Accept(autCurr, t) | |||
if !i.aut.CanMatch(autNext) { | |||
// TODO: potential optimization to skip nextOffset | |||
// forwards more directly to something that the | |||
// automaton likes rather than a linear scan? | |||
nextOffset += 1 | |||
continue INNER | |||
} | |||
@@ -234,30 +247,41 @@ OUTER: | |||
i.valsStack = append(i.valsStack, v) | |||
i.autStatesStack = append(i.autStatesStack, autNext) | |||
// check to see if new keystack might have gone too far | |||
if i.endKeyExclusive != nil && | |||
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { | |||
return ErrIteratorDone | |||
} | |||
nextOffset = 0 | |||
allowCompare = true | |||
continue OUTER | |||
} | |||
// no more transitions, so need to backtrack and stack pop | |||
if len(i.statesStack) <= 1 { | |||
// stack len is 1 (root), can't go back further, we're done | |||
break | |||
} | |||
// no transitions, and still room to pop | |||
i.statesStack = i.statesStack[:len(i.statesStack)-1] | |||
i.keysStack = i.keysStack[:len(i.keysStack)-1] | |||
// if the top of the stack represents a linear chain of states | |||
// (i.e., a suffix of nodes linked by single transitions), | |||
// then optimize by popping the suffix in one shot without | |||
// going back all the way to the OUTER loop | |||
var popNum int | |||
for j := len(i.statesStack) - 1; j > 0; j-- { | |||
if i.statesStack[j].NumTransitions() != 1 { | |||
popNum = len(i.statesStack) - 1 - j | |||
break | |||
} | |||
} | |||
if popNum < 1 { // always pop at least 1 entry from the stacks | |||
popNum = 1 | |||
} | |||
nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1 | |||
nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1 | |||
allowCompare = false | |||
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1] | |||
i.valsStack = i.valsStack[:len(i.valsStack)-1] | |||
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1] | |||
i.statesStack = i.statesStack[:len(i.statesStack)-popNum] | |||
i.keysStack = i.keysStack[:len(i.keysStack)-popNum] | |||
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum] | |||
i.valsStack = i.valsStack[:len(i.valsStack)-popNum] | |||
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum] | |||
} | |||
return ErrIteratorDone |
@@ -0,0 +1,10 @@ | |||
module github.com/couchbase/vellum | |||
go 1.12 | |||
require ( | |||
github.com/edsrzf/mmap-go v1.0.0 | |||
github.com/spf13/cobra v0.0.5 | |||
github.com/willf/bitset v1.1.10 | |||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect | |||
) |
@@ -0,0 +1,39 @@ | |||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= | |||
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= | |||
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= | |||
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= | |||
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= | |||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= | |||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | |||
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= | |||
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= | |||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= | |||
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= | |||
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= | |||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= | |||
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= | |||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= | |||
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= | |||
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= | |||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | |||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= | |||
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= | |||
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= | |||
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= | |||
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= | |||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= | |||
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= | |||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= | |||
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= | |||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= | |||
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= | |||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= | |||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= | |||
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= | |||
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= | |||
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= | |||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ= | |||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= | |||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= | |||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | |||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= |
@@ -12,7 +12,7 @@ | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package levenshtein2 | |||
package levenshtein | |||
import ( | |||
"fmt" |
@@ -12,7 +12,7 @@ | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package levenshtein2 | |||
package levenshtein | |||
import ( | |||
"fmt" |
@@ -12,7 +12,7 @@ | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package levenshtein2 | |||
package levenshtein | |||
import "fmt" | |||
@@ -12,7 +12,7 @@ | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package levenshtein2 | |||
package levenshtein | |||
import ( | |||
"math" |
@@ -12,7 +12,7 @@ | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package levenshtein2 | |||
package levenshtein | |||
import ( | |||
"crypto/md5" |
@@ -75,15 +75,23 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) { | |||
Rune0: [2]rune{r, r}, | |||
} | |||
next.Rune = next.Rune0[0:2] | |||
return c.c(&next) | |||
} | |||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( | |||
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) | |||
if err != nil { | |||
return err | |||
} | |||
for _, seq := range c.sequences { | |||
c.compileUtf8Ranges(seq) | |||
// try to find more folded runes | |||
for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) { | |||
next.Rune = append(next.Rune, r1, r1) | |||
} | |||
err = c.c(&next) | |||
if err != nil { | |||
return err | |||
} | |||
} else { | |||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( | |||
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) | |||
if err != nil { | |||
return err | |||
} | |||
for _, seq := range c.sequences { | |||
c.compileUtf8Ranges(seq) | |||
} | |||
} | |||
} | |||
case syntax.OpAnyChar: |
@@ -0,0 +1,12 @@ | |||
// +build riscv64 | |||
package bbolt | |||
// maxMapSize represents the largest mmap size supported by Bolt. | |||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB | |||
// maxAllocSize is the size used when creating array pointers. | |||
const maxAllocSize = 0x7FFFFFFF | |||
// Are unaligned load/stores broken on this arch? | |||
var brokenUnaligned = true |
@@ -121,6 +121,7 @@ type DB struct { | |||
AllocSize int | |||
path string | |||
openFile func(string, int, os.FileMode) (*os.File, error) | |||
file *os.File | |||
dataref []byte // mmap'ed readonly, write throws SEGV | |||
data *[maxMapSize]byte | |||
@@ -199,10 +200,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { | |||
db.readOnly = true | |||
} | |||
db.openFile = options.OpenFile | |||
if db.openFile == nil { | |||
db.openFile = os.OpenFile | |||
} | |||
// Open data file and separate sync handler for metadata writes. | |||
db.path = path | |||
var err error | |||
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { | |||
if db.file, err = db.openFile(db.path, flag|os.O_CREATE, mode); err != nil { | |||
_ = db.close() | |||
return nil, err | |||
} | |||
@@ -1054,6 +1060,10 @@ type Options struct { | |||
// set directly on the DB itself when returned from Open(), but this option | |||
// is useful in APIs which expose Options but not the underlying DB. | |||
NoSync bool | |||
// OpenFile is used to open files. It defaults to os.OpenFile. This option | |||
// is useful for writing hermetic tests. | |||
OpenFile func(string, int, os.FileMode) (*os.File, error) | |||
} | |||
// DefaultOptions represent the options used if nil options are passed into Open(). |
@@ -349,6 +349,28 @@ func (f *freelist) reload(p *page) { | |||
f.readIDs(a) | |||
} | |||
// noSyncReload reads the freelist from pgids and filters out pending items. | |||
func (f *freelist) noSyncReload(pgids []pgid) { | |||
// Build a cache of only pending pages. | |||
pcache := make(map[pgid]bool) | |||
for _, txp := range f.pending { | |||
for _, pendingID := range txp.ids { | |||
pcache[pendingID] = true | |||
} | |||
} | |||
// Check each page in the freelist and build a new available freelist | |||
// with any pages not in the pending lists. | |||
var a []pgid | |||
for _, id := range pgids { | |||
if !pcache[id] { | |||
a = append(a, id) | |||
} | |||
} | |||
f.readIDs(a) | |||
} | |||
// reindex rebuilds the free cache based on available and pending free lists. | |||
func (f *freelist) reindex() { | |||
ids := f.getFreePageIDs() |
@@ -254,17 +254,36 @@ func (tx *Tx) Rollback() error { | |||
if tx.db == nil { | |||
return ErrTxClosed | |||
} | |||
tx.rollback() | |||
tx.nonPhysicalRollback() | |||
return nil | |||
} | |||
// nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk. | |||
func (tx *Tx) nonPhysicalRollback() { | |||
if tx.db == nil { | |||
return | |||
} | |||
if tx.writable { | |||
tx.db.freelist.rollback(tx.meta.txid) | |||
} | |||
tx.close() | |||
} | |||
// rollback needs to reload the free pages from disk in case some system error happens like fsync error. | |||
func (tx *Tx) rollback() { | |||
if tx.db == nil { | |||
return | |||
} | |||
if tx.writable { | |||
tx.db.freelist.rollback(tx.meta.txid) | |||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) | |||
if !tx.db.hasSyncedFreelist() { | |||
// Reconstruct free page list by scanning the DB to get the whole free page list. | |||
// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. | |||
tx.db.freelist.noSyncReload(tx.db.freepages()) | |||
} else { | |||
// Read free page list from freelist page. | |||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) | |||
} | |||
} | |||
tx.close() | |||
} | |||
@@ -315,7 +334,7 @@ func (tx *Tx) Copy(w io.Writer) error { | |||
// If err == nil then exactly tx.Size() bytes will be written into the writer. | |||
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { | |||
// Attempt to open reader with WriteFlag | |||
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) | |||
f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) | |||
if err != nil { | |||
return 0, err | |||
} | |||
@@ -369,7 +388,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { | |||
// A reader transaction is maintained during the copy so it is safe to continue | |||
// using the database while a copy is in progress. | |||
func (tx *Tx) CopyFile(path string, mode os.FileMode) error { | |||
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) | |||
f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) | |||
if err != nil { | |||
return err | |||
} |
@@ -1,3 +1,5 @@ | |||
The MIT license. | |||
Copyright (c) 2014 the go-unsnap-stream authors. | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
@@ -7,6 +9,9 @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of | |||
the Software, and to permit persons to whom the Software is furnished to do so, | |||
subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | |||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |||
@@ -14,5 +19,3 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
Permission is explicitly granted to relicense this material under new terms of | |||
your choice when integrating this library with another library or project. |
@@ -7,7 +7,9 @@ Note that the *streaming or framing format* for snappy is different from snappy | |||
Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4]. | |||
For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. | |||
Update to the previous paragraph: Horray! Good news: Thanks to @nigeltao, we have since learned that the [github.com/golang/snappy](https://github.com/golang/snappy) package now provides the snappy streaming format too. Even though the type level descriptions are a little misleading because they don't mention that they are for the stream format, the [snappy package header documentation](https://godoc.org/github.com/golang/snappy) points out that the [snappy.Reader](https://godoc.org/github.com/golang/snappy#Reader) and [snappy.Writer](https://godoc.org/github.com/golang/snappy#Writer) types do indeed provide stream (vs block) handling. Although I have not benchmarked, you should probably prefer that package as it will likely be maintained more than I have time to devote, and also perhaps better integrated with the underlying snappy as they share the same repo. | |||
For binary compatibility with the [python implementation](https://pypi.python.org/pypi/python-snappy) in [4], one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. | |||
However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead. | |||
@@ -17,4 +19,4 @@ However, while the c-snappy was useful for checking compatibility, it introduced | |||
[3] https://github.com/kubo/snzip | |||
[4] https://pypi.python.org/pypi/python-snappy | |||
[4] https://pypi.python.org/pypi/python-snappy |
@@ -7,6 +7,7 @@ import ( | |||
"io" | |||
"io/ioutil" | |||
"os" | |||
"strings" | |||
"hash/crc32" | |||
@@ -189,7 +190,12 @@ func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedS | |||
err = nil | |||
} | |||
} else { | |||
panic(err) | |||
// may be an odd already closed... don't panic on that | |||
if strings.Contains(err.Error(), "file already closed") { | |||
err = nil | |||
} else { | |||
panic(err) | |||
} | |||
} | |||
} | |||
@@ -5,6 +5,8 @@ import ( | |||
"reflect" | |||
) | |||
const resumableDefault = false | |||
var ( | |||
// ErrShortBytes is returned when the | |||
// slice being decoded is too short to | |||
@@ -26,99 +28,240 @@ type Error interface { | |||
// Resumable returns whether | |||
// or not the error means that | |||
// the stream of data is malformed | |||
// and the information is unrecoverable. | |||
// and the information is unrecoverable. | |||
Resumable() bool | |||
} | |||
// contextError allows msgp Error instances to be enhanced with additional | |||
// context about their origin. | |||
type contextError interface { | |||
Error | |||
// withContext must not modify the error instance - it must clone and | |||
// return a new error with the context added. | |||
withContext(ctx string) error | |||
} | |||
// Cause returns the underlying cause of an error that has been wrapped | |||
// with additional context. | |||
func Cause(e error) error { | |||
out := e | |||
if e, ok := e.(errWrapped); ok && e.cause != nil { | |||
out = e.cause | |||
} | |||
return out | |||
} | |||
// Resumable returns whether or not the error means that the stream of data is | |||
// malformed and the information is unrecoverable. | |||
func Resumable(e error) bool { | |||
if e, ok := e.(Error); ok { | |||
return e.Resumable() | |||
} | |||
return resumableDefault | |||
} | |||
// WrapError wraps an error with additional context that allows the part of the | |||
// serialized type that caused the problem to be identified. Underlying errors | |||
// can be retrieved using Cause() | |||
// | |||
// The input error is not modified - a new error should be returned. | |||
// | |||
// ErrShortBytes is not wrapped with any context due to backward compatibility | |||
// issues with the public API. | |||
// | |||
func WrapError(err error, ctx ...interface{}) error { | |||
switch e := err.(type) { | |||
case errShort: | |||
return e | |||
case contextError: | |||
return e.withContext(ctxString(ctx)) | |||
default: | |||
return errWrapped{cause: err, ctx: ctxString(ctx)} | |||
} | |||
} | |||
// ctxString converts the incoming interface{} slice into a single string. | |||
func ctxString(ctx []interface{}) string { | |||
out := "" | |||
for idx, cv := range ctx { | |||
if idx > 0 { | |||
out += "/" | |||
} | |||
out += fmt.Sprintf("%v", cv) | |||
} | |||
return out | |||
} | |||
func addCtx(ctx, add string) string { | |||
if ctx != "" { | |||
return add + "/" + ctx | |||
} else { | |||
return add | |||
} | |||
} | |||
// errWrapped allows arbitrary errors passed to WrapError to be enhanced with | |||
// context and unwrapped with Cause() | |||
type errWrapped struct { | |||
cause error | |||
ctx string | |||
} | |||
func (e errWrapped) Error() string { | |||
if e.ctx != "" { | |||
return fmt.Sprintf("%s at %s", e.cause, e.ctx) | |||
} else { | |||
return e.cause.Error() | |||
} | |||
} | |||
func (e errWrapped) Resumable() bool { | |||
if e, ok := e.cause.(Error); ok { | |||
return e.Resumable() | |||
} | |||
return resumableDefault | |||
} | |||
type errShort struct{} | |||
func (e errShort) Error() string { return "msgp: too few bytes left to read object" } | |||
func (e errShort) Resumable() bool { return false } | |||
type errFatal struct{} | |||
type errFatal struct { | |||
ctx string | |||
} | |||
func (f errFatal) Error() string { | |||
out := "msgp: fatal decoding error (unreachable code)" | |||
if f.ctx != "" { | |||
out += " at " + f.ctx | |||
} | |||
return out | |||
} | |||
func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" } | |||
func (f errFatal) Resumable() bool { return false } | |||
func (f errFatal) withContext(ctx string) error { f.ctx = addCtx(f.ctx, ctx); return f } | |||
// ArrayError is an error returned | |||
// when decoding a fix-sized array | |||
// of the wrong size | |||
type ArrayError struct { | |||
Wanted uint32 | |||
Got uint32 | |||
ctx string | |||
} | |||
// Error implements the error interface | |||
func (a ArrayError) Error() string { | |||
return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) | |||
out := fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) | |||
if a.ctx != "" { | |||
out += " at " + a.ctx | |||
} | |||
return out | |||
} | |||
// Resumable is always 'true' for ArrayErrors | |||
func (a ArrayError) Resumable() bool { return true } | |||
func (a ArrayError) withContext(ctx string) error { a.ctx = addCtx(a.ctx, ctx); return a } | |||
// IntOverflow is returned when a call | |||
// would downcast an integer to a type | |||
// with too few bits to hold its value. | |||
type IntOverflow struct { | |||
Value int64 // the value of the integer | |||
FailedBitsize int // the bit size that the int64 could not fit into | |||
ctx string | |||
} | |||
// Error implements the error interface | |||
func (i IntOverflow) Error() string { | |||
return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) | |||
str := fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) | |||
if i.ctx != "" { | |||
str += " at " + i.ctx | |||
} | |||
return str | |||
} | |||
// Resumable is always 'true' for overflows | |||
func (i IntOverflow) Resumable() bool { return true } | |||
func (i IntOverflow) withContext(ctx string) error { i.ctx = addCtx(i.ctx, ctx); return i } | |||
// UintOverflow is returned when a call | |||
// would downcast an unsigned integer to a type | |||
// with too few bits to hold its value | |||
type UintOverflow struct { | |||
Value uint64 // value of the uint | |||
FailedBitsize int // the bit size that couldn't fit the value | |||
ctx string | |||
} | |||
// Error implements the error interface | |||
func (u UintOverflow) Error() string { | |||
return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) | |||
str := fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) | |||
if u.ctx != "" { | |||
str += " at " + u.ctx | |||
} | |||
return str | |||
} | |||
// Resumable is always 'true' for overflows | |||
func (u UintOverflow) Resumable() bool { return true } | |||
func (u UintOverflow) withContext(ctx string) error { u.ctx = addCtx(u.ctx, ctx); return u } | |||
// UintBelowZero is returned when a call | |||
// would cast a signed integer below zero | |||
// to an unsigned integer. | |||
type UintBelowZero struct { | |||
Value int64 // value of the incoming int | |||
ctx string | |||
} | |||
// Error implements the error interface | |||
func (u UintBelowZero) Error() string { | |||
return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) | |||
str := fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) | |||
if u.ctx != "" { | |||
str += " at " + u.ctx | |||
} | |||
return str | |||
} | |||
// Resumable is always 'true' for overflows | |||
func (u UintBelowZero) Resumable() bool { return true } | |||
func (u UintBelowZero) withContext(ctx string) error { | |||
u.ctx = ctx | |||
return u | |||
} | |||
// A TypeError is returned when a particular | |||
// decoding method is unsuitable for decoding | |||
// a particular MessagePack value. | |||
type TypeError struct { | |||
Method Type // Type expected by method | |||
Encoded Type // Type actually encoded | |||
ctx string | |||
} | |||
// Error implements the error interface | |||
func (t TypeError) Error() string { | |||
return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) | |||
out := fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) | |||
if t.ctx != "" { | |||
out += " at " + t.ctx | |||
} | |||
return out | |||
} | |||
// Resumable returns 'true' for TypeErrors | |||
func (t TypeError) Resumable() bool { return true } | |||
func (t TypeError) withContext(ctx string) error { t.ctx = addCtx(t.ctx, ctx); return t } | |||
// returns either InvalidPrefixError or | |||
// TypeError depending on whether or not | |||
// the prefix is recognized | |||
@@ -148,10 +291,24 @@ func (i InvalidPrefixError) Resumable() bool { return false } | |||
// to a function that takes `interface{}`. | |||
type ErrUnsupportedType struct { | |||
T reflect.Type | |||
ctx string | |||
} | |||
// Error implements error | |||
func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) } | |||
func (e *ErrUnsupportedType) Error() string { | |||
out := fmt.Sprintf("msgp: type %q not supported", e.T) | |||
if e.ctx != "" { | |||
out += " at " + e.ctx | |||
} | |||
return out | |||
} | |||
// Resumable returns 'true' for ErrUnsupportedType | |||
func (e *ErrUnsupportedType) Resumable() bool { return true } | |||
func (e *ErrUnsupportedType) withContext(ctx string) error { | |||
o := *e | |||
o.ctx = addCtx(o.ctx, ctx) | |||
return &o | |||
} |