From ab5a147d5b218ffad14fa979e6313a82e5f12815 Mon Sep 17 00:00:00 2001 From: Alok Kumar Singh Date: Sun, 9 Aug 2020 16:00:02 +0530 Subject: [PATCH] Upgraded samara to master because need sync commit support https://github.com/Shopify/sarama/pull/1699 --- redshiftsink/go.mod | 2 +- redshiftsink/go.sum | 52 +- redshiftsink/pkg/consumer/client.go | 4 + redshiftsink/pkg/consumer/kafkago.go | 3 - .../github.com/Shopify/sarama/.gitignore | 2 + .../github.com/Shopify/sarama/.golangci.yml | 5 +- .../vendor/github.com/Shopify/sarama/Makefile | 6 +- .../github.com/Shopify/sarama/README.md | 2 +- .../vendor/github.com/Shopify/sarama/admin.go | 16 +- .../Shopify/sarama/async_producer.go | 4 + .../Shopify/sarama/balance_strategy.go | 12 +- .../github.com/Shopify/sarama/config.go | 16 + .../github.com/Shopify/sarama/consumer.go | 11 +- .../Shopify/sarama/consumer_group.go | 23 +- .../vendor/github.com/Shopify/sarama/dev.yml | 2 +- .../Shopify/sarama/docker-compose.yml | 134 +++ .../vendor/github.com/Shopify/sarama/go.mod | 25 +- .../vendor/github.com/Shopify/sarama/go.sum | 53 +- .../Shopify/sarama/gssapi_kerberos.go | 3 +- .../github.com/Shopify/sarama/interceptors.go | 43 + .../Shopify/sarama/kerberos_client.go | 15 +- .../Shopify/sarama/offset_manager.go | 27 +- .../vendor/github.com/Shopify/sarama/utils.go | 4 +- .../klauspost/compress/fse/bitreader.go | 27 +- .../klauspost/compress/fse/bytereader.go | 13 +- .../github.com/klauspost/compress/fse/fse.go | 37 +- .../klauspost/compress/huff0/README.md | 6 +- .../klauspost/compress/huff0/bitreader.go | 256 ++++- .../klauspost/compress/huff0/decompress.go | 904 +++++++++++++++--- .../klauspost/compress/huff0/huff0.go | 13 +- .../klauspost/compress/zstd/README.md | 214 +++-- .../klauspost/compress/zstd/bitreader.go | 25 +- .../klauspost/compress/zstd/blockdec.go | 73 +- .../klauspost/compress/zstd/blockenc.go | 12 +- .../klauspost/compress/zstd/bytebuf.go | 4 +- .../klauspost/compress/zstd/bytereader.go | 18 +- .../klauspost/compress/zstd/decoder.go | 55 +- .../compress/zstd/decoder_options.go | 16 + .../klauspost/compress/zstd/dict.go | 104 ++ .../klauspost/compress/zstd/enc_better.go | 518 ++++++++++ .../klauspost/compress/zstd/enc_dfast.go | 112 +-- .../klauspost/compress/zstd/enc_fast.go | 227 +++-- .../klauspost/compress/zstd/enc_params.go | 3 + .../klauspost/compress/zstd/encoder.go | 91 +- .../compress/zstd/encoder_options.go | 76 +- .../klauspost/compress/zstd/framedec.go | 55 +- .../klauspost/compress/zstd/fse_decoder.go | 23 +- .../klauspost/compress/zstd/fse_encoder.go | 8 +- .../klauspost/compress/zstd/history.go | 18 +- .../zstd/internal/xxhash/xxhash_amd64.s | 8 +- .../klauspost/compress/zstd/seqdec.go | 143 ++- .../klauspost/compress/zstd/snappy.go | 2 +- .../klauspost/compress/zstd/zstd.go | 42 +- .../vendor/github.com/pierrec/lz4/block.go | 70 +- .../vendor/github.com/pierrec/lz4/lz4.go | 2 +- .../rcrowley/go-metrics/.travis.yml | 2 + redshiftsink/vendor/modules.txt | 12 +- 57 files changed, 2895 insertions(+), 758 deletions(-) delete mode 100644 redshiftsink/pkg/consumer/kafkago.go create mode 100644 redshiftsink/vendor/github.com/Shopify/sarama/docker-compose.yml create mode 100644 redshiftsink/vendor/github.com/Shopify/sarama/interceptors.go create mode 100644 redshiftsink/vendor/github.com/klauspost/compress/zstd/dict.go create mode 100644 redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_better.go diff --git a/redshiftsink/go.mod b/redshiftsink/go.mod index 4ee580645..65b197a37 100644 --- a/redshiftsink/go.mod +++ b/redshiftsink/go.mod @@ -3,7 +3,7 @@ module github.com/practo/tipoca-stream/redshiftbatcher go 1.14 require ( - github.com/Shopify/sarama v1.26.4 + github.com/Shopify/sarama v1.26.5-0.20200805062841-c4bb5cd3a290 github.com/practo/klog/v2 v2.2.1 github.com/spf13/pflag v1.0.5 ) diff --git a/redshiftsink/go.sum b/redshiftsink/go.sum index aa795770a..3a88a17c6 100644 --- a/redshiftsink/go.sum +++ b/redshiftsink/go.sum @@ -1,7 +1,8 @@ -github.com/Shopify/sarama v1.26.4 h1:+17TxUq/PJEAfZAll0T7XJjSgQWCpaQSoki/x5yN8o8= -github.com/Shopify/sarama v1.26.4/go.mod h1:NbSGBSSndYaIhRcBtY9V0U7AyH+x71bG668AuWys/yU= +github.com/Shopify/sarama v1.26.5-0.20200805062841-c4bb5cd3a290 h1:uFCcXU096rrCzh7aXtYnyIkbnRx0IvQWYU/WDmrYlbQ= +github.com/Shopify/sarama v1.26.5-0.20200805062841-c4bb5cd3a290/go.mod h1:aCdj6ymI8uyPEux1JJ9gcaDT6cinjGhNCAhs54taSUo= github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWsokNbMijUGhmcoBJc= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -13,58 +14,63 @@ github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= -github.com/frankban/quicktest v1.7.2 h1:2QxQoC1TS09S7fhCPsrvqYdvP1H5M1P1ih5ABm3BTYk= -github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03DbaAcR7Ks/o= +github.com/frankban/quicktest v1.10.0 h1:Gfh+GAJZOAoKZsIZeZbdn2JF10kN1XHNvjsvQK8gVkE= +github.com/frankban/quicktest v1.10.0/go.mod h1:ui7WezCLWMWxVWr1GETZY3smRy0G4KWq9vcPtJmFl7Y= github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.4.1 h1:/exdXoGamhu5ONeUJH0deniYLWYvQwW66yvlfiiKTu0= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= -github.com/klauspost/compress v1.9.8 h1:VMAMUUOh+gaxKTMk+zqbjsSjsIcUcL/LF4o63i82QyA= -github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/klauspost/compress v1.10.10 h1:a/y8CglcM7gLGYmlbP/stPE5sR3hbhFRUjCBfd/0B3I= +github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/pierrec/lz4 v2.4.1+incompatible h1:mFe7ttWaflA46Mhqh+jUfjp2qTbPYxLB2/OyBppH9dg= -github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= +github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/practo/klog/v2 v2.2.1 h1:yV/SavG73KHSmCVwtDap1RGe+2jd86e1eugT+Mv2/FA= github.com/practo/klog/v2 v2.2.1/go.mod h1:WMkpfPwTxLgSLookpI4UUv2zL7tMltKACK/FHB1zLV0= -github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 h1:dY6ETXrvDG7Sa4vE8ZQG4yqWg6UnOcbqTAahkV813vQ= -github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ= +github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.0 h1:jlIyCplCJFULU/01vCkhKuTyc3OorI3bJFuw6obfgho= +github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72 h1:+ELyKg6m8UBf0nPFSqD0mi7zUfwPyXo23HNjMnXPz7w= -golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37 h1:cg5LA/zNPRzIXIWSCxQW10Rvpy94aQh3LT/ShoCpkHw= +golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200528225125-3c3fba18258b h1:IYiJPiJfzktmDAO1HQiwjMjwjlYKHAL7KzeD544RJPs= +golang.org/x/net v0.0.0-20200528225125-3c3fba18258b/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/jcmturner/aescts.v1 v1.0.1 h1:cVVZBK2b1zY26haWB4vbBiZrfFQnfbTVrE3xZq6hrEw= gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= gopkg.in/jcmturner/dnsutils.v1 v1.0.1 h1:cIuC1OLRGZrld+16ZJvvZxVJeKPsvd5eUIvxfoN5hSM= @@ -75,6 +81,6 @@ gopkg.in/jcmturner/gokrb5.v7 v7.5.0 h1:a9tsXlIDD9SKxotJMK3niV7rPZAJeX2aD/0yg3qlI gopkg.in/jcmturner/gokrb5.v7 v7.5.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= gopkg.in/jcmturner/rpc.v1 v1.1.0 h1:QHIUxTX1ISuAv9dD2wJ9HWQVuWDX/Zc0PfeC2tjc4rU= gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200601152816-913338de1bd2 h1:VEmvx0P+GVTgkNu2EdTN988YCZPcD3lo9AoczZpucwc= +gopkg.in/yaml.v3 v3.0.0-20200601152816-913338de1bd2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/redshiftsink/pkg/consumer/client.go b/redshiftsink/pkg/consumer/client.go index 61f09d56d..452708ec6 100644 --- a/redshiftsink/pkg/consumer/client.go +++ b/redshiftsink/pkg/consumer/client.go @@ -81,6 +81,10 @@ func NewSaramaClient( c.Consumer.Offsets.Initial = sarama.OffsetOldest } + // disable auto commits of offsets + // https://github.com/Shopify/sarama/issues/1570#issuecomment-574908417 + c.Consumer.Offsets.AutoCommit.Enable = false + brokers := strings.Split(brokerURLs, ",") consumerGroup, err := sarama.NewConsumerGroup(brokers, group, c) diff --git a/redshiftsink/pkg/consumer/kafkago.go b/redshiftsink/pkg/consumer/kafkago.go deleted file mode 100644 index a4c58ad9e..000000000 --- a/redshiftsink/pkg/consumer/kafkago.go +++ /dev/null @@ -1,3 +0,0 @@ -package consumer - -// diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/.gitignore b/redshiftsink/vendor/github.com/Shopify/sarama/.gitignore index 6e362e4f2..2c9adc20b 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/.gitignore +++ b/redshiftsink/vendor/github.com/Shopify/sarama/.gitignore @@ -25,3 +25,5 @@ _testmain.go coverage.txt profile.out + +simplest-uncommitted-msg-0.1-jar-with-dependencies.jar diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/.golangci.yml b/redshiftsink/vendor/github.com/Shopify/sarama/.golangci.yml index 47624f3de..ce2b5230d 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/.golangci.yml +++ b/redshiftsink/vendor/github.com/Shopify/sarama/.golangci.yml @@ -66,9 +66,12 @@ linters: - varcheck - whitespace # - goconst - # - gochecknoinits + - gochecknoinits issues: exclude: - consider giving a name to these results - include an explanation for nolint directive + - Potential Integer overflow made by strconv.Atoi result conversion to int16/32 + - Use of weak random number generator + - TLS MinVersion too low diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/Makefile b/redshiftsink/vendor/github.com/Shopify/sarama/Makefile index c3b431a56..a05863480 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/Makefile +++ b/redshiftsink/vendor/github.com/Shopify/sarama/Makefile @@ -21,7 +21,11 @@ fmt: gofmt -s -l -w $(FILES) $(TESTS) lint: - golangci-lint run + GOFLAGS="-tags=functional" golangci-lint run test: $(GOTEST) ./... + +.PHONY: test_functional +test_functional: + $(GOTEST) -tags=functional ./... diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/README.md b/redshiftsink/vendor/github.com/Shopify/sarama/README.md index 9b7478d7c..38d39695b 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/README.md +++ b/redshiftsink/vendor/github.com/Shopify/sarama/README.md @@ -20,7 +20,7 @@ You might also want to look at the [Frequently Asked Questions](https://github.c Sarama provides a "2 releases + 2 months" compatibility guarantee: we support the two latest stable releases of Kafka and Go, and we provide a two month grace period for older releases. This means we currently officially support -Go 1.12 through 1.14, and Kafka 2.1 through 2.4, although older releases are +Go 1.13 through 1.14, and Kafka 2.4 through 2.6, although older releases are still likely to work. Sarama follows semantic versioning and provides API stability via the gopkg.in service. diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/admin.go b/redshiftsink/vendor/github.com/Shopify/sarama/admin.go index 0430d9841..9dea0255f 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/admin.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/admin.go @@ -802,7 +802,7 @@ func (ca *clusterAdmin) ListConsumerGroups() (allGroups map[string]string, err e // Query brokers in parallel, since we have to query *all* brokers brokers := ca.client.Brokers() groupMaps := make(chan map[string]string, len(brokers)) - errors := make(chan error, len(brokers)) + errChan := make(chan error, len(brokers)) wg := sync.WaitGroup{} for _, b := range brokers { @@ -813,7 +813,7 @@ func (ca *clusterAdmin) ListConsumerGroups() (allGroups map[string]string, err e response, err := b.ListGroups(&ListGroupsRequest{}) if err != nil { - errors <- err + errChan <- err return } @@ -828,7 +828,7 @@ func (ca *clusterAdmin) ListConsumerGroups() (allGroups map[string]string, err e wg.Wait() close(groupMaps) - close(errors) + close(errChan) for groupMap := range groupMaps { for group, protocolType := range groupMap { @@ -837,7 +837,7 @@ func (ca *clusterAdmin) ListConsumerGroups() (allGroups map[string]string, err e } // Intentionally return only the first error for simplicity - err = <-errors + err = <-errChan return } @@ -893,7 +893,7 @@ func (ca *clusterAdmin) DescribeLogDirs(brokerIds []int32) (allLogDirs map[int32 // Query brokers in parallel, since we may have to query multiple brokers logDirsMaps := make(chan map[int32][]DescribeLogDirsResponseDirMetadata, len(brokerIds)) - errors := make(chan error, len(brokerIds)) + errChan := make(chan error, len(brokerIds)) wg := sync.WaitGroup{} for _, b := range brokerIds { @@ -909,7 +909,7 @@ func (ca *clusterAdmin) DescribeLogDirs(brokerIds []int32) (allLogDirs map[int32 response, err := b.DescribeLogDirs(&DescribeLogDirsRequest{}) if err != nil { - errors <- err + errChan <- err return } logDirs := make(map[int32][]DescribeLogDirsResponseDirMetadata) @@ -920,7 +920,7 @@ func (ca *clusterAdmin) DescribeLogDirs(brokerIds []int32) (allLogDirs map[int32 wg.Wait() close(logDirsMaps) - close(errors) + close(errChan) for logDirsMap := range logDirsMaps { for id, logDirs := range logDirsMap { @@ -929,6 +929,6 @@ func (ca *clusterAdmin) DescribeLogDirs(brokerIds []int32) (allLogDirs map[int32 } // Intentionally return only the first error for simplicity - err = <-errors + err = <-errChan return } diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/async_producer.go b/redshiftsink/vendor/github.com/Shopify/sarama/async_producer.go index d0ce01b66..f1ffc8f92 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/async_producer.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/async_producer.go @@ -348,6 +348,10 @@ func (p *asyncProducer) dispatcher() { p.inFlight.Add(1) } + for _, interceptor := range p.conf.Producer.Interceptors { + msg.safelyApplyInterceptor(interceptor) + } + version := 1 if p.conf.Version.IsAtLeast(V0_11_0_0) { version = 2 diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/balance_strategy.go b/redshiftsink/vendor/github.com/Shopify/sarama/balance_strategy.go index d9789a026..0ce7fea1f 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/balance_strategy.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/balance_strategy.go @@ -373,8 +373,8 @@ func getBalanceScore(assignment map[string][]topicPartitionAssignment) int { } // Determine whether the current assignment plan is balanced. -func isBalanced(currentAssignment map[string][]topicPartitionAssignment, sortedCurrentSubscriptions []string, allSubscriptions map[string][]topicPartitionAssignment) bool { - sortedCurrentSubscriptions = sortMemberIDsByPartitionAssignments(currentAssignment) +func isBalanced(currentAssignment map[string][]topicPartitionAssignment, allSubscriptions map[string][]topicPartitionAssignment) bool { + sortedCurrentSubscriptions := sortMemberIDsByPartitionAssignments(currentAssignment) min := len(currentAssignment[sortedCurrentSubscriptions[0]]) max := len(currentAssignment[sortedCurrentSubscriptions[len(sortedCurrentSubscriptions)-1]]) if min >= max-1 { @@ -430,7 +430,7 @@ func (s *stickyBalanceStrategy) performReassignments(reassignablePartitions []to // reassign all reassignable partitions (starting from the partition with least potential consumers and if needed) // until the full list is processed or a balance is achieved for _, partition := range reassignablePartitions { - if isBalanced(currentAssignment, sortedCurrentSubscriptions, consumer2AllPotentialPartitions) { + if isBalanced(currentAssignment, consumer2AllPotentialPartitions) { break } @@ -690,11 +690,11 @@ func sortPartitionsByPotentialConsumerAssignments(partition2AllPotentialConsumer } func deepCopyAssignment(assignment map[string][]topicPartitionAssignment) map[string][]topicPartitionAssignment { - copy := make(map[string][]topicPartitionAssignment, len(assignment)) + m := make(map[string][]topicPartitionAssignment, len(assignment)) for memberID, subscriptions := range assignment { - copy[memberID] = append(subscriptions[:0:0], subscriptions...) + m[memberID] = append(subscriptions[:0:0], subscriptions...) } - return copy + return m } func areSubscriptionsIdentical(partition2AllPotentialConsumers map[topicPartitionAssignment][]string, consumer2AllPotentialPartitions map[string][]topicPartitionAssignment) bool { diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/config.go b/redshiftsink/vendor/github.com/Shopify/sarama/config.go index 0ce308f80..9b7ce7aeb 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/config.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/config.go @@ -229,6 +229,14 @@ type Config struct { // `Backoff` if set. BackoffFunc func(retries, maxRetries int) time.Duration } + + // Interceptors to be called when the producer dispatcher reads the + // message for the first time. Interceptors allows to intercept and + // possible mutate the message before they are published to Kafka + // cluster. *ProducerMessage modified by the first interceptor's + // OnSend() is passed to the second interceptor OnSend(), and so on in + // the interceptor chain. + Interceptors []ProducerInterceptor } // Consumer is the namespace for configuration related to consuming messages, @@ -391,6 +399,14 @@ type Config struct { // - use `ReadUncommitted` (default) to consume and return all messages in message channel // - use `ReadCommitted` to hide messages that are part of an aborted transaction IsolationLevel IsolationLevel + + // Interceptors to be called just before the record is sent to the + // messages channel. Interceptors allows to intercept and possible + // mutate the message before they are returned to the client. + // *ConsumerMessage modified by the first interceptor's OnConsume() is + // passed to the second interceptor OnConsume(), and so on in the + // interceptor chain. + Interceptors []ConsumerInterceptor } // A user-provided string sent with every request to the brokers for logging, diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/consumer.go b/redshiftsink/vendor/github.com/Shopify/sarama/consumer.go index e16d08aa9..f5f993a77 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/consumer.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/consumer.go @@ -422,13 +422,13 @@ func (child *partitionConsumer) AsyncClose() { func (child *partitionConsumer) Close() error { child.AsyncClose() - var errors ConsumerErrors + var consumerErrors ConsumerErrors for err := range child.errors { - errors = append(errors, err) + consumerErrors = append(consumerErrors, err) } - if len(errors) > 0 { - return errors + if len(consumerErrors) > 0 { + return consumerErrors } return nil } @@ -451,6 +451,9 @@ feederLoop: } for i, msg := range msgs { + for _, interceptor := range child.conf.Consumer.Interceptors { + msg.safelyApplyInterceptor(interceptor) + } messageSelect: select { case <-child.dying: diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/consumer_group.go b/redshiftsink/vendor/github.com/Shopify/sarama/consumer_group.go index 056b9e387..fcc5792ea 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/consumer_group.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/consumer_group.go @@ -255,36 +255,36 @@ func (c *consumerGroup) newSession(ctx context.Context, topics []string, handler } // Sync consumer group - sync, err := c.syncGroupRequest(coordinator, plan, join.GenerationId) + groupRequest, err := c.syncGroupRequest(coordinator, plan, join.GenerationId) if err != nil { _ = coordinator.Close() return nil, err } - switch sync.Err { + switch groupRequest.Err { case ErrNoError: case ErrUnknownMemberId, ErrIllegalGeneration: // reset member ID and retry immediately c.memberID = "" return c.newSession(ctx, topics, handler, retries) case ErrNotCoordinatorForConsumer: // retry after backoff with coordinator refresh if retries <= 0 { - return nil, sync.Err + return nil, groupRequest.Err } return c.retryNewSession(ctx, topics, handler, retries, true) case ErrRebalanceInProgress: // retry after backoff if retries <= 0 { - return nil, sync.Err + return nil, groupRequest.Err } return c.retryNewSession(ctx, topics, handler, retries, false) default: - return nil, sync.Err + return nil, groupRequest.Err } // Retrieve and sort claims var claims map[string][]int32 - if len(sync.MemberAssignment) > 0 { - members, err := sync.GetMemberAssignment() + if len(groupRequest.MemberAssignment) > 0 { + members, err := groupRequest.GetMemberAssignment() if err != nil { return nil, err } @@ -513,6 +513,11 @@ type ConsumerGroupSession interface { // message twice, and your processing should ideally be idempotent. MarkOffset(topic string, partition int32, offset int64, metadata string) + // Commit the offset to the backend + // + // Note: calling Commit performs a blocking synchronous operation. + Commit() + // ResetOffset resets to the provided offset, alongside a metadata string that // represents the state of the partition consumer at that point in time. Reset // acts as a counterpart to MarkOffset, the difference being that it allows to @@ -624,6 +629,10 @@ func (s *consumerGroupSession) MarkOffset(topic string, partition int32, offset } } +func (s *consumerGroupSession) Commit() { + s.offsets.Commit() +} + func (s *consumerGroupSession) ResetOffset(topic string, partition int32, offset int64, metadata string) { if pom := s.offsets.findPOM(topic, partition); pom != nil { pom.ResetOffset(offset, metadata) diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/dev.yml b/redshiftsink/vendor/github.com/Shopify/sarama/dev.yml index 57b2d3ca8..6468f1549 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/dev.yml +++ b/redshiftsink/vendor/github.com/Shopify/sarama/dev.yml @@ -2,7 +2,7 @@ name: sarama up: - go: - version: '1.14' + version: '1.14.3' commands: test: diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/docker-compose.yml b/redshiftsink/vendor/github.com/Shopify/sarama/docker-compose.yml new file mode 100644 index 000000000..25593fd3b --- /dev/null +++ b/redshiftsink/vendor/github.com/Shopify/sarama/docker-compose.yml @@ -0,0 +1,134 @@ +version: '3.7' +services: + zookeeper-1: + image: 'confluentinc/cp-zookeeper:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + ZOOKEEPER_SERVER_ID: '1' + ZOOKEEPER_SERVERS: 'zookeeper-1:2888:3888;zookeeper-2:2888:3888;zookeeper-3:2888:3888' + ZOOKEEPER_CLIENT_PORT: '2181' + ZOOKEEPER_PEER_PORT: '2888' + ZOOKEEPER_LEADER_PORT: '3888' + ZOOKEEPER_INIT_LIMIT: '10' + ZOOKEEPER_SYNC_LIMIT: '5' + ZOOKEEPER_MAX_CLIENT_CONNS: '0' + zookeeper-2: + image: 'confluentinc/cp-zookeeper:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + ZOOKEEPER_SERVER_ID: '2' + ZOOKEEPER_SERVERS: 'zookeeper-1:2888:3888;zookeeper-2:2888:3888;zookeeper-3:2888:3888' + ZOOKEEPER_CLIENT_PORT: '2181' + ZOOKEEPER_PEER_PORT: '2888' + ZOOKEEPER_LEADER_PORT: '3888' + ZOOKEEPER_INIT_LIMIT: '10' + ZOOKEEPER_SYNC_LIMIT: '5' + ZOOKEEPER_MAX_CLIENT_CONNS: '0' + zookeeper-3: + image: 'confluentinc/cp-zookeeper:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + ZOOKEEPER_SERVER_ID: '3' + ZOOKEEPER_SERVERS: 'zookeeper-1:2888:3888;zookeeper-2:2888:3888;zookeeper-3:2888:3888' + ZOOKEEPER_CLIENT_PORT: '2181' + ZOOKEEPER_PEER_PORT: '2888' + ZOOKEEPER_LEADER_PORT: '3888' + ZOOKEEPER_INIT_LIMIT: '10' + ZOOKEEPER_SYNC_LIMIT: '5' + ZOOKEEPER_MAX_CLIENT_CONNS: '0' + kafka-1: + image: 'confluentinc/cp-kafka:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181' + KAFKA_LISTENERS: 'LISTENER_INTERNAL://:9091,LISTENER_LOCAL://:29091' + KAFKA_ADVERTISED_LISTENERS: 'LISTENER_INTERNAL://kafka-1:9091,LISTENER_LOCAL://localhost:29091' + KAFKA_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' + KAFKA_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_BROKER_ID: '1' + KAFKA_BROKER_RACK: '1' + KAFKA_ZOOKEEPER_SESSION_TIMEOUT_MS: '3000' + KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS: '3000' + KAFKA_REPLICA_SELECTOR_CLASS: 'org.apache.kafka.common.replica.RackAwareReplicaSelector' + KAFKA_DELETE_TOPIC_ENABLE: 'true' + KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' + kafka-2: + image: 'confluentinc/cp-kafka:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181' + KAFKA_LISTENERS: 'LISTENER_INTERNAL://:9091,LISTENER_LOCAL://:29092' + KAFKA_ADVERTISED_LISTENERS: 'LISTENER_INTERNAL://kafka-2:9091,LISTENER_LOCAL://localhost:29092' + KAFKA_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' + KAFKA_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_BROKER_ID: '2' + KAFKA_BROKER_RACK: '2' + KAFKA_ZOOKEEPER_SESSION_TIMEOUT_MS: '3000' + KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS: '3000' + KAFKA_REPLICA_SELECTOR_CLASS: 'org.apache.kafka.common.replica.RackAwareReplicaSelector' + KAFKA_DELETE_TOPIC_ENABLE: 'true' + KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' + kafka-3: + image: 'confluentinc/cp-kafka:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181' + KAFKA_LISTENERS: 'LISTENER_INTERNAL://:9091,LISTENER_LOCAL://:29093' + KAFKA_ADVERTISED_LISTENERS: 'LISTENER_INTERNAL://kafka-3:9091,LISTENER_LOCAL://localhost:29093' + KAFKA_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' + KAFKA_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_BROKER_ID: '3' + KAFKA_BROKER_RACK: '3' + KAFKA_ZOOKEEPER_SESSION_TIMEOUT_MS: '3000' + KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS: '3000' + KAFKA_REPLICA_SELECTOR_CLASS: 'org.apache.kafka.common.replica.RackAwareReplicaSelector' + KAFKA_DELETE_TOPIC_ENABLE: 'true' + KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' + kafka-4: + image: 'confluentinc/cp-kafka:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181' + KAFKA_LISTENERS: 'LISTENER_INTERNAL://:9091,LISTENER_LOCAL://:29094' + KAFKA_ADVERTISED_LISTENERS: 'LISTENER_INTERNAL://kafka-4:9091,LISTENER_LOCAL://localhost:29094' + KAFKA_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' + KAFKA_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_BROKER_ID: '4' + KAFKA_BROKER_RACK: '4' + KAFKA_ZOOKEEPER_SESSION_TIMEOUT_MS: '3000' + KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS: '3000' + KAFKA_REPLICA_SELECTOR_CLASS: 'org.apache.kafka.common.replica.RackAwareReplicaSelector' + KAFKA_DELETE_TOPIC_ENABLE: 'true' + KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' + kafka-5: + image: 'confluentinc/cp-kafka:${CONFLUENT_PLATFORM_VERSION:-5.5.0}' + restart: always + environment: + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181' + KAFKA_LISTENERS: 'LISTENER_INTERNAL://:9091,LISTENER_LOCAL://:29095' + KAFKA_ADVERTISED_LISTENERS: 'LISTENER_INTERNAL://kafka-5:9091,LISTENER_LOCAL://localhost:29095' + KAFKA_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' + KAFKA_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_BROKER_ID: '5' + KAFKA_BROKER_RACK: '5' + KAFKA_ZOOKEEPER_SESSION_TIMEOUT_MS: '3000' + KAFKA_ZOOKEEPER_CONNECTION_TIMEOUT_MS: '3000' + KAFKA_REPLICA_SELECTOR_CLASS: 'org.apache.kafka.common.replica.RackAwareReplicaSelector' + KAFKA_DELETE_TOPIC_ENABLE: 'true' + KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'false' + toxiproxy: + image: 'shopify/toxiproxy:2.1.4' + ports: + # The tests themselves actually start the proies on these ports + - '29091:29091' + - '29092:29092' + - '29093:29093' + - '29094:29094' + - '29095:29095' + # This is the toxiproxy API port + - '8474:8474' diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/go.mod b/redshiftsink/vendor/github.com/Shopify/sarama/go.mod index 1dca1cc33..d775035ce 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/go.mod +++ b/redshiftsink/vendor/github.com/Shopify/sarama/go.mod @@ -9,26 +9,27 @@ require ( github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 github.com/eapache/queue v1.1.0 github.com/fortytw2/leaktest v1.3.0 - github.com/frankban/quicktest v1.7.2 // indirect + github.com/frankban/quicktest v1.10.0 // indirect github.com/golang/snappy v0.0.1 // indirect - github.com/google/go-cmp v0.4.0 // indirect + github.com/google/go-cmp v0.4.1 // indirect github.com/hashicorp/go-uuid v1.0.2 // indirect - github.com/jcmturner/gofork v1.0.0 // indirect - github.com/klauspost/compress v1.9.8 - github.com/kr/pretty v0.2.0 // indirect - github.com/pierrec/lz4 v2.4.1+incompatible - github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 - github.com/stretchr/testify v1.4.0 + github.com/jcmturner/gofork v1.0.0 + github.com/klauspost/compress v1.10.10 + github.com/kr/text v0.2.0 // indirect + github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect + github.com/pierrec/lz4 v2.5.2+incompatible + github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 + github.com/stretchr/testify v1.6.0 github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c github.com/xdg/stringprep v1.0.0 // indirect - golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72 // indirect - golang.org/x/net v0.0.0-20200202094626-16171245cfb2 + golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37 // indirect + golang.org/x/net v0.0.0-20200528225125-3c3fba18258b golang.org/x/text v0.3.2 // indirect - gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect gopkg.in/jcmturner/aescts.v1 v1.0.1 // indirect gopkg.in/jcmturner/dnsutils.v1 v1.0.1 // indirect gopkg.in/jcmturner/goidentity.v3 v3.0.0 // indirect gopkg.in/jcmturner/gokrb5.v7 v7.5.0 gopkg.in/jcmturner/rpc.v1 v1.1.0 // indirect - gopkg.in/yaml.v2 v2.2.8 // indirect + gopkg.in/yaml.v3 v3.0.0-20200601152816-913338de1bd2 // indirect ) diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/go.sum b/redshiftsink/vendor/github.com/Shopify/sarama/go.sum index 06ec3280c..ad0889d49 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/go.sum +++ b/redshiftsink/vendor/github.com/Shopify/sarama/go.sum @@ -1,5 +1,6 @@ github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWsokNbMijUGhmcoBJc= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -11,49 +12,55 @@ github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= -github.com/frankban/quicktest v1.7.2 h1:2QxQoC1TS09S7fhCPsrvqYdvP1H5M1P1ih5ABm3BTYk= -github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03DbaAcR7Ks/o= +github.com/frankban/quicktest v1.10.0 h1:Gfh+GAJZOAoKZsIZeZbdn2JF10kN1XHNvjsvQK8gVkE= +github.com/frankban/quicktest v1.10.0/go.mod h1:ui7WezCLWMWxVWr1GETZY3smRy0G4KWq9vcPtJmFl7Y= github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.4.1 h1:/exdXoGamhu5ONeUJH0deniYLWYvQwW66yvlfiiKTu0= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE= github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= -github.com/klauspost/compress v1.9.8 h1:VMAMUUOh+gaxKTMk+zqbjsSjsIcUcL/LF4o63i82QyA= -github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/klauspost/compress v1.10.7 h1:7rix8v8GpI3ZBb0nSozFRgbtXKv+hOe+qfEpZqybrAg= +github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.10.10 h1:a/y8CglcM7gLGYmlbP/stPE5sR3hbhFRUjCBfd/0B3I= +github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/pierrec/lz4 v2.4.1+incompatible h1:mFe7ttWaflA46Mhqh+jUfjp2qTbPYxLB2/OyBppH9dg= -github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= +github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 h1:dY6ETXrvDG7Sa4vE8ZQG4yqWg6UnOcbqTAahkV813vQ= -github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ= +github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.0 h1:jlIyCplCJFULU/01vCkhKuTyc3OorI3bJFuw6obfgho= +github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c h1:u40Z8hqBAAQyv+vATcGgV0YCnDjqSL7/q/JyPhhJSPk= github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v1.0.0 h1:d9X0esnoa3dFsV0FG35rAT0RIhYFlPq7MiP+DW89La0= github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72 h1:+ELyKg6m8UBf0nPFSqD0mi7zUfwPyXo23HNjMnXPz7w= -golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37 h1:cg5LA/zNPRzIXIWSCxQW10Rvpy94aQh3LT/ShoCpkHw= +golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200528225125-3c3fba18258b h1:IYiJPiJfzktmDAO1HQiwjMjwjlYKHAL7KzeD544RJPs= +golang.org/x/net v0.0.0-20200528225125-3c3fba18258b/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= @@ -63,8 +70,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IV golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/jcmturner/aescts.v1 v1.0.1 h1:cVVZBK2b1zY26haWB4vbBiZrfFQnfbTVrE3xZq6hrEw= gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= gopkg.in/jcmturner/dnsutils.v1 v1.0.1 h1:cIuC1OLRGZrld+16ZJvvZxVJeKPsvd5eUIvxfoN5hSM= @@ -75,7 +82,7 @@ gopkg.in/jcmturner/gokrb5.v7 v7.5.0 h1:a9tsXlIDD9SKxotJMK3niV7rPZAJeX2aD/0yg3qlI gopkg.in/jcmturner/gokrb5.v7 v7.5.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= gopkg.in/jcmturner/rpc.v1 v1.1.0 h1:QHIUxTX1ISuAv9dD2wJ9HWQVuWDX/Zc0PfeC2tjc4rU= gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200601152816-913338de1bd2 h1:VEmvx0P+GVTgkNu2EdTN988YCZPcD3lo9AoczZpucwc= +gopkg.in/yaml.v3 v3.0.0-20200601152816-913338de1bd2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/gssapi_kerberos.go b/redshiftsink/vendor/github.com/Shopify/sarama/gssapi_kerberos.go index 98be0f508..1993fc08f 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/gssapi_kerberos.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/gssapi_kerberos.go @@ -1,13 +1,13 @@ package sarama import ( - "encoding/asn1" "encoding/binary" "fmt" "io" "strings" "time" + "github.com/jcmturner/gofork/encoding/asn1" "gopkg.in/jcmturner/gokrb5.v7/asn1tools" "gopkg.in/jcmturner/gokrb5.v7/gssapi" "gopkg.in/jcmturner/gokrb5.v7/iana/chksumtype" @@ -34,6 +34,7 @@ type GSSAPIConfig struct { Username string Password string Realm string + DisablePAFXFAST bool } type GSSAPIKerberosAuth struct { diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/interceptors.go b/redshiftsink/vendor/github.com/Shopify/sarama/interceptors.go new file mode 100644 index 000000000..d0d33e526 --- /dev/null +++ b/redshiftsink/vendor/github.com/Shopify/sarama/interceptors.go @@ -0,0 +1,43 @@ +package sarama + +// ProducerInterceptor allows you to intercept (and possibly mutate) the records +// received by the producer before they are published to the Kafka cluster. +// https://cwiki.apache.org/confluence/display/KAFKA/KIP-42%3A+Add+Producer+and+Consumer+Interceptors#KIP42:AddProducerandConsumerInterceptors-Motivation +type ProducerInterceptor interface { + + // OnSend is called when the producer message is intercepted. Please avoid + // modifying the message until it's safe to do so, as this is _not_ a copy + // of the message. + OnSend(*ProducerMessage) +} + +// ConsumerInterceptor allows you to intercept (and possibly mutate) the records +// received by the consumer before they are sent to the messages channel. +// https://cwiki.apache.org/confluence/display/KAFKA/KIP-42%3A+Add+Producer+and+Consumer+Interceptors#KIP42:AddProducerandConsumerInterceptors-Motivation +type ConsumerInterceptor interface { + + // OnConsume is called when the consumed message is intercepted. Please + // avoid modifying the message until it's safe to do so, as this is _not_ a + // copy of the message. + OnConsume(*ConsumerMessage) +} + +func (msg *ProducerMessage) safelyApplyInterceptor(interceptor ProducerInterceptor) { + defer func() { + if r := recover(); r != nil { + Logger.Printf("Error when calling producer interceptor: %s, %w\n", interceptor, r) + } + }() + + interceptor.OnSend(msg) +} + +func (msg *ConsumerMessage) safelyApplyInterceptor(interceptor ConsumerInterceptor) { + defer func() { + if r := recover(); r != nil { + Logger.Printf("Error when calling consumer interceptor: %s, %w\n", interceptor, r) + } + }() + + interceptor.OnConsume(msg) +} diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/kerberos_client.go b/redshiftsink/vendor/github.com/Shopify/sarama/kerberos_client.go index 91b998f5d..ebc114179 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/kerberos_client.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/kerberos_client.go @@ -19,14 +19,9 @@ func (c *KerberosGoKrb5Client) CName() types.PrincipalName { return c.Credentials.CName() } -/* -* -* Create kerberos client used to obtain TGT and TGS tokens -* used gokrb5 library, which is a pure go kerberos client with -* some GSS-API capabilities, and SPNEGO support. Kafka does not use SPNEGO -* it uses pure Kerberos 5 solution (RFC-4121 and RFC-4120). -* - */ +// NewKerberosClient creates kerberos client used to obtain TGT and TGS tokens. +// It uses pure go Kerberos 5 solution (RFC-4121 and RFC-4120). +// uses gokrb5 library underlying which is a pure go kerberos client with some GSS-API capabilities. func NewKerberosClient(config *GSSAPIConfig) (KerberosClient, error) { cfg, err := krb5config.Load(config.KerberosConfigPath) if err != nil { @@ -42,10 +37,10 @@ func createClient(config *GSSAPIConfig, cfg *krb5config.Config) (KerberosClient, if err != nil { return nil, err } - client = krb5client.NewClientWithKeytab(config.Username, config.Realm, kt, cfg) + client = krb5client.NewClientWithKeytab(config.Username, config.Realm, kt, cfg, krb5client.DisablePAFXFAST(config.DisablePAFXFAST)) } else { client = krb5client.NewClientWithPassword(config.Username, - config.Realm, config.Password, cfg) + config.Realm, config.Password, cfg, krb5client.DisablePAFXFAST(config.DisablePAFXFAST)) } return &KerberosGoKrb5Client{*client}, nil } diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/offset_manager.go b/redshiftsink/vendor/github.com/Shopify/sarama/offset_manager.go index 19408729f..b4fea8226 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/offset_manager.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/offset_manager.go @@ -19,6 +19,10 @@ type OffsetManager interface { // will otherwise leak memory. You must call this after all the // PartitionOffsetManagers are closed. Close() error + + // Commit commits the offsets. This method can be used if AutoCommit.Enable is + // set to false. + Commit() } type offsetManager struct { @@ -58,7 +62,6 @@ func newOffsetManagerFromClient(group, memberID string, generation int32, client client: client, conf: conf, group: group, - ticker: time.NewTicker(conf.Consumer.Offsets.AutoCommit.Interval), poms: make(map[string]map[int32]*partitionOffsetManager), memberID: memberID, @@ -67,7 +70,10 @@ func newOffsetManagerFromClient(group, memberID string, generation int32, client closing: make(chan none), closed: make(chan none), } - go withRecover(om.mainLoop) + if conf.Consumer.Offsets.AutoCommit.Enable { + om.ticker = time.NewTicker(conf.Consumer.Offsets.AutoCommit.Interval) + go withRecover(om.mainLoop) + } return om, nil } @@ -99,7 +105,9 @@ func (om *offsetManager) Close() error { om.closeOnce.Do(func() { // exit the mainLoop close(om.closing) - <-om.closed + if om.conf.Consumer.Offsets.AutoCommit.Enable { + <-om.closed + } // mark all POMs as closed om.asyncClosePOMs() @@ -225,20 +233,19 @@ func (om *offsetManager) mainLoop() { for { select { case <-om.ticker.C: - om.flushToBroker() - om.releasePOMs(false) + om.Commit() case <-om.closing: return } } } -// flushToBroker is ignored if auto-commit offsets is disabled -func (om *offsetManager) flushToBroker() { - if !om.conf.Consumer.Offsets.AutoCommit.Enable { - return - } +func (om *offsetManager) Commit() { + om.flushToBroker() + om.releasePOMs(false) +} +func (om *offsetManager) flushToBroker() { req := om.constructRequest() if req == nil { return diff --git a/redshiftsink/vendor/github.com/Shopify/sarama/utils.go b/redshiftsink/vendor/github.com/Shopify/sarama/utils.go index d138a5eb3..93bdeefef 100644 --- a/redshiftsink/vendor/github.com/Shopify/sarama/utils.go +++ b/redshiftsink/vendor/github.com/Shopify/sarama/utils.go @@ -161,6 +161,7 @@ var ( V2_3_0_0 = newKafkaVersion(2, 3, 0, 0) V2_4_0_0 = newKafkaVersion(2, 4, 0, 0) V2_5_0_0 = newKafkaVersion(2, 5, 0, 0) + V2_6_0_0 = newKafkaVersion(2, 6, 0, 0) SupportedVersions = []KafkaVersion{ V0_8_2_0, @@ -187,9 +188,10 @@ var ( V2_3_0_0, V2_4_0_0, V2_5_0_0, + V2_6_0_0, } MinVersion = V0_8_2_0 - MaxVersion = V2_5_0_0 + MaxVersion = V2_6_0_0 ) //ParseKafkaVersion parses and returns kafka version or error from a string diff --git a/redshiftsink/vendor/github.com/klauspost/compress/fse/bitreader.go b/redshiftsink/vendor/github.com/klauspost/compress/fse/bitreader.go index b9db204f5..f65eb3909 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/fse/bitreader.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/fse/bitreader.go @@ -6,6 +6,7 @@ package fse import ( + "encoding/binary" "errors" "io" ) @@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBits(uint32(v))) return nil } @@ -63,8 +68,9 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. - v := b.in[b.off-4 : b.off] + // 2 bounds checks. + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -77,7 +83,8 @@ func (b *bitReader) fill() { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -91,9 +98,17 @@ func (b *bitReader) fill() { } } +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return b.bitsRead >= 64 && b.off == 0 } // close the bitstream and returns an error if out-of-buffer reads occurred. diff --git a/redshiftsink/vendor/github.com/klauspost/compress/fse/bytereader.go b/redshiftsink/vendor/github.com/klauspost/compress/fse/bytereader.go index f228a46cd..abade2d60 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/fse/bytereader.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/fse/bytereader.go @@ -25,19 +25,10 @@ func (b *byteReader) advance(n uint) { b.off += int(n) } -// Int32 returns a little endian int32 starting at current offset. -func (b byteReader) Int32() int32 { - b2 := b.b[b.off : b.off+4 : b.off+4] - v3 := int32(b2[3]) - v2 := int32(b2[2]) - v1 := int32(b2[1]) - v0 := int32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - // Uint32 returns a little endian uint32 starting at current offset. func (b byteReader) Uint32() uint32 { - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] v3 := uint32(b2[3]) v2 := uint32(b2[2]) v1 := uint32(b2[1]) diff --git a/redshiftsink/vendor/github.com/klauspost/compress/fse/fse.go b/redshiftsink/vendor/github.com/klauspost/compress/fse/fse.go index 075357b5b..535cbadfd 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/fse/fse.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/fse/fse.go @@ -44,18 +44,14 @@ var ( // Scratch provides temporary storage for compression and decompression. type Scratch struct { // Private - count [maxSymbolValue + 1]uint32 - norm [maxSymbolValue + 1]int16 - symbolLen uint16 // Length of active part of the symbol table. - actualTableLog uint8 // Selected tablelog. - br byteReader - bits bitReader - bw bitWriter - ct cTable // Compression tables. - decTable []decSymbol // Decompression table. - zeroBits bool // no bits has prob > 50%. - clearCount bool // clear count - maxCount int // count of the most probable symbol + count [maxSymbolValue + 1]uint32 + norm [maxSymbolValue + 1]int16 + br byteReader + bits bitReader + bw bitWriter + ct cTable // Compression tables. + decTable []decSymbol // Decompression table. + maxCount int // count of the most probable symbol // Per block parameters. // These can be used to override compression parameters of the block. @@ -68,17 +64,22 @@ type Scratch struct { // and allocation will be avoided. Out []byte - // MaxSymbolValue will override the maximum symbol value of the next block. - MaxSymbolValue uint8 - - // TableLog will attempt to override the tablelog for the next block. - TableLog uint8 - // DecompressLimit limits the maximum decoded size acceptable. // If > 0 decompression will stop when approximately this many bytes // has been decoded. // If 0, maximum size will be 2GB. DecompressLimit int + + symbolLen uint16 // Length of active part of the symbol table. + actualTableLog uint8 // Selected tablelog. + zeroBits bool // no bits has prob > 50%. + clearCount bool // clear count + + // MaxSymbolValue will override the maximum symbol value of the next block. + MaxSymbolValue uint8 + + // TableLog will attempt to override the tablelog for the next block. + TableLog uint8 } // Histogram allows to populate the histogram and skip that step in the compression, diff --git a/redshiftsink/vendor/github.com/klauspost/compress/huff0/README.md b/redshiftsink/vendor/github.com/klauspost/compress/huff0/README.md index 0a8448ce9..e12da4db2 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/huff0/README.md +++ b/redshiftsink/vendor/github.com/klauspost/compress/huff0/README.md @@ -12,8 +12,6 @@ but it can be used as a secondary step to compressors (like Snappy) that does no * [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) -THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE. - ## News * Mar 2018: First implementation released. Consider this beta software for now. @@ -75,6 +73,8 @@ which can be given to the decompressor. Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. +For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size. + You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back your input was likely corrupted. @@ -84,4 +84,4 @@ There are no integrity checks, so relying on errors from the decompressor does n # Contributing Contributions are always welcome. Be aware that adding public functions will require good justification and breaking -changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file +changes will likely not be accepted. If in doubt open an issue before writing the PR. diff --git a/redshiftsink/vendor/github.com/klauspost/compress/huff0/bitreader.go b/redshiftsink/vendor/github.com/klauspost/compress/huff0/bitreader.go index 7d0903c70..a4979e886 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -6,6 +6,7 @@ package huff0 import ( + "encoding/binary" "errors" "io" ) @@ -34,29 +35,16 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBit32(uint32(v))) return nil } -// getBits will return n bits. n can be 0. -func (b *bitReader) getBits(n uint8) uint16 { - if n == 0 || b.bitsRead >= 64 { - return 0 - } - return b.getBitsFast(n) -} - -// getBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - b.bitsRead += n - return v -} - // peekBitsFast requires that at least one bit is requested every time. // There are no checks if the buffer is filled. func (b *bitReader) peekBitsFast(n uint8) uint16 { @@ -71,21 +59,36 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. + + // 2 bounds checks. v := b.in[b.off-4 : b.off] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 b.off -= 4 } +func (b *bitReader) advance(n uint8) { + b.bitsRead += n +} + +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // fill() will make sure at least 32 bits are available. func (b *bitReader) fill() { if b.bitsRead < 32 { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -113,3 +116,214 @@ func (b *bitReader) close() error { } return nil } + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReaderBytes struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReaderBytes) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } + b.advance(8 - uint8(highBit32(uint32(v)))) + return nil +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReaderBytes) peekByteFast() uint8 { + got := uint8(b.value >> 56) + return got +} + +func (b *bitReaderBytes) advance(n uint8) { + b.bitsRead += n + b.value <<= n & 63 +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReaderBytes) fillFast() { + if b.bitsRead < 32 { + return + } + + // 2 bounds checks. + v := b.in[b.off-4 : b.off] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << (b.bitsRead - 32) + b.bitsRead -= 32 + b.off -= 4 +} + +// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read. +func (b *bitReaderBytes) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReaderBytes) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << (b.bitsRead - 32) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReaderBytes) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReaderBytes) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} + +// bitReaderShifted reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReaderShifted struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReaderShifted) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } + b.advance(8 - uint8(highBit32(uint32(v)))) + return nil +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 { + return uint16(b.value >> ((64 - n) & 63)) +} + +func (b *bitReaderShifted) advance(n uint8) { + b.bitsRead += n + b.value <<= n & 63 +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReaderShifted) fillFast() { + if b.bitsRead < 32 { + return + } + + // 2 bounds checks. + v := b.in[b.off-4 : b.off] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << ((b.bitsRead - 32) & 63) + b.bitsRead -= 32 + b.off -= 4 +} + +// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read. +func (b *bitReaderShifted) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReaderShifted) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << ((b.bitsRead - 32) & 63) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReaderShifted) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReaderShifted) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/redshiftsink/vendor/github.com/klauspost/compress/huff0/decompress.go b/redshiftsink/vendor/github.com/klauspost/compress/huff0/decompress.go index 97ae66a4a..a03b2634a 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -25,6 +25,9 @@ type dEntryDouble struct { len uint8 } +// Uses special code for all tables that are < 8 bits. +const use8BitTables = true + // ReadTable will read a table from the input. // The size of the input may be larger than the table definition. // Any content remaining after the table definition will be returned. @@ -83,6 +86,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { } v2 := v & 15 rankStats[v2]++ + // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0. weightTotal += (1 << v2) >> 1 } if weightTotal == 0 { @@ -142,12 +146,14 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { d := dEntrySingle{ entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), } - single := s.dt.single[rankStats[w] : rankStats[w]+length] + rank := &rankStats[w] + single := s.dt.single[*rank : *rank+length] for i := range single { single[i] = d } - rankStats[w] += length + *rank += length } + return s, in, nil } @@ -155,237 +161,905 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { // The length of the supplied input must match the end of a block exactly. // Before this is called, the table must be initialized with ReadTable unless // the encoder re-used the table. +// deprecated: Use the stateless Decoder() to get a concurrent version. func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) { - if len(s.dt.single) == 0 { - return nil, errors.New("no table loaded") + if cap(s.Out) < s.MaxDecodedSize { + s.Out = make([]byte, s.MaxDecodedSize) } - var br bitReader - err = br.init(in) - if err != nil { - return nil, err + s.Out = s.Out[:0:s.MaxDecodedSize] + s.Out, err = s.Decoder().Decompress1X(s.Out, in) + return s.Out, err +} + +// Decompress4X will decompress a 4X encoded stream. +// Before this is called, the table must be initialized with ReadTable unless +// the encoder re-used the table. +// The length of the supplied input must match the end of a block exactly. +// The destination size of the uncompressed data must be known and provided. +// deprecated: Use the stateless Decoder() to get a concurrent version. +func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { + if dstSize > s.MaxDecodedSize { + return nil, ErrMaxDecodedSizeExceeded + } + if cap(s.Out) < dstSize { + s.Out = make([]byte, s.MaxDecodedSize) + } + s.Out = s.Out[:0:dstSize] + s.Out, err = s.Decoder().Decompress4X(s.Out, in) + return s.Out, err +} + +// Decoder will return a stateless decoder that can be used by multiple +// decompressors concurrently. +// Before this is called, the table must be initialized with ReadTable. +// The Decoder is still linked to the scratch buffer so that cannot be reused. +// However, it is safe to discard the scratch. +func (s *Scratch) Decoder() *Decoder { + return &Decoder{ + dt: s.dt, + actualTableLog: s.actualTableLog, } - s.Out = s.Out[:0] +} + +// Decoder provides stateless decoding. +type Decoder struct { + dt dTable + actualTableLog uint8 +} - decode := func() byte { - val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := s.dt.single[val] - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) +// Decompress1X will decompress a 1X encoded stream. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress1X8Bit(dst, src) } - hasDec := func(v dEntrySingle) byte { - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) + var br bitReaderShifted + err := br.init(src) + if err != nil { + return dst, err } + maxDecodedSize := cap(dst) + dst = dst[:0] // Avoid bounds check by always having full sized table. const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 - dt := s.dt.single[:tlSize] + dt := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var tmp = s.huffWeight[:256] + var buf [256]byte var off uint8 for br.off >= 8 { br.fillFast() - tmp[off+0] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) - tmp[off+1] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + v := dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + // Refill br.fillFast() - tmp[off+2] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) - tmp[off+3] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + off += 4 if off == 0 { - if len(s.Out)+256 > s.MaxDecodedSize { + if len(dst)+256 > maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, tmp...) + dst = append(dst, buf[:]...) } } - if len(s.Out)+int(off) > s.MaxDecodedSize { + if len(dst)+int(off) > maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, tmp[:off]...) + dst = append(dst, buf[:off]...) - for !br.finished() { + // br < 8, so uint8 is fine + bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead + for bitsLeft > 0 { br.fill() - if len(s.Out) >= s.MaxDecodedSize { + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= nBits + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} + +// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { + if d.actualTableLog == 8 { + return d.decompress1X8BitExactly(dst, src) + } + var br bitReaderBytes + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + dt := d.dt.single[:256] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + + shift := (8 - d.actualTableLog) & 7 + + //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) + for br.off >= 4 { + br.fillFast() + v := dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 4, so uint8 is fine + bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + for bitsLeft > 0 { + if br.bitsRead >= 64-8 { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := dt[br.peekByteFast()>>shift] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= int8(nBits) + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} + +// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { + var br bitReaderBytes + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + dt := d.dt.single[:256] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + + const shift = 0 + + //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) + for br.off >= 4 { + br.fillFast() + v := dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 4, so uint8 is fine + bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + for bitsLeft > 0 { + if br.bitsRead >= 64-8 { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + if len(dst) >= maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - s.Out = append(s.Out, decode()) + v := dt[br.peekByteFast()>>shift] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= int8(nBits) + dst = append(dst, uint8(v.entry>>8)) } - return s.Out, br.close() + return dst, br.close() } // Decompress4X will decompress a 4X encoded stream. -// Before this is called, the table must be initialized with ReadTable unless -// the encoder re-used the table. // The length of the supplied input must match the end of a block exactly. -// The destination size of the uncompressed data must be known and provided. -func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { - if len(s.dt.single) == 0 { +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { return nil, errors.New("no table loaded") } - if len(in) < 6+(4*1) { + if len(src) < 6+(4*1) { return nil, errors.New("input too small") } - if dstSize > s.MaxDecodedSize { - return nil, ErrMaxDecodedSizeExceeded + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress4X8bit(dst, src) } - // TODO: We do not detect when we overrun a buffer, except if the last one does. - var br [4]bitReader + var br [4]bitReaderShifted start := 6 for i := 0; i < 3; i++ { - length := int(in[i*2]) | (int(in[i*2+1]) << 8) - if start+length >= len(in) { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { return nil, errors.New("truncated input (or invalid offset)") } - err = br[i].init(in[start : start+length]) + err := br[i].init(src[start : start+length]) if err != nil { return nil, err } start += length } - err = br[3].init(in[start:]) + err := br[3].init(src[start:]) if err != nil { return nil, err } - // Prepare output - if cap(s.Out) < dstSize { - s.Out = make([]byte, 0, dstSize) - } - s.Out = s.Out[:dstSize] // destination, offset to match first output - dstOut := s.Out + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst dstEvery := (dstSize + 3) / 4 const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 - single := s.dt.single[:tlSize] - - decode := func(br *bitReader) byte { - val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := single[val&tlMask] - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) - } + single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var tmp = s.huffWeight[:256] + var buf [256]byte var off uint8 var decoded int // Decode 2 values from each decoder/loop. const bufoff = 256 / 4 -bigloop: for { - for i := range br { - br := &br[i] - if br.off < 4 { - break bigloop - } - br.fillFast() + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break } { const stream = 0 - val := br[stream].peekBitsFast(s.actualTableLog) + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream] = uint8(v.entry >> 8) - val2 := br[stream].peekBitsFast(s.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream+1] = uint8(v.entry >> 8) + + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v2 = single[val2&tlMask] + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) } { - const stream = 1 - val := br[stream].peekBitsFast(s.actualTableLog) + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream] = uint8(v.entry >> 8) - val2 := br[stream].peekBitsFast(s.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream+1] = uint8(v.entry >> 8) + + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v2 = single[val2&tlMask] + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) + } + + off += 2 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) + off = 0 + out = out[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + bitsLeft := br.off*8 + uint(64-br.bitsRead) + for bitsLeft > 0 { + br.fill() + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { + if d.actualTableLog == 8 { + return d.decompress4X8bitExactly(dst, src) + } + + var br [4]bitReaderBytes + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + shift := (8 - d.actualTableLog) & 7 + + const tlSize = 1 << 8 + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + var decoded int + + // Decode 4 values from each decoder/loop. + const bufoff = 256 / 4 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break + } + + { + // Interleave 2 decodes. + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) } { const stream = 2 - val := br[stream].peekBitsFast(s.actualTableLog) - v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() - val2 := br[stream].peekBitsFast(s.actualTableLog) - v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + off += 4 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) + off = 0 + out = out[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + bitsLeft := int(br.off*8) + int(64-br.bitsRead) + for bitsLeft > 0 { + if br.finished() { + return nil, io.ErrUnexpectedEOF + } + if br.bitsRead >= 56 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value |= uint64(low) << (br.bitsRead - 32) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + v := single[br.peekByteFast()>>shift].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= int(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { + var br [4]bitReaderBytes + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const shift = 0 + const tlSize = 1 << 8 + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + var decoded int + + // Decode 4 values from each decoder/loop. + const bufoff = 256 / 4 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break } { - const stream = 3 - val := br[stream].peekBitsFast(s.actualTableLog) - v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) + // Interleave 2 decodes. + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() - val2 := br[stream].peekBitsFast(s.actualTableLog) - v2 := single[val2&tlMask] - tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) - tmp[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) } - off += 2 + { + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + off += 4 if off == bufoff { if bufoff > dstEvery { return nil, errors.New("corruption detected: stream overrun 1") } - copy(dstOut, tmp[:bufoff]) - copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2]) - copy(dstOut[dstEvery*2:], tmp[bufoff*2:bufoff*3]) - copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4]) + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) off = 0 - dstOut = dstOut[bufoff:] + out = out[bufoff:] decoded += 256 // There must at least be 3 buffers left. - if len(dstOut) < dstEvery*3 { + if len(out) < dstEvery*3 { return nil, errors.New("corruption detected: stream overrun 2") } } } if off > 0 { ioff := int(off) - if len(dstOut) < dstEvery*3+ioff { + if len(out) < dstEvery*3+ioff { return nil, errors.New("corruption detected: stream overrun 3") } - copy(dstOut, tmp[:off]) - copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2]) - copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3]) - copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4]) + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) decoded += int(off) * 4 - dstOut = dstOut[off:] + out = out[off:] } // Decode remaining. for i := range br { offset := dstEvery * i br := &br[i] - for !br.finished() { - br.fill() - if offset >= len(dstOut) { + bitsLeft := int(br.off*8) + int(64-br.bitsRead) + for bitsLeft > 0 { + if br.finished() { + return nil, io.ErrUnexpectedEOF + } + if br.bitsRead >= 56 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value |= uint64(low) << (br.bitsRead - 32) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { return nil, errors.New("corruption detected: stream overrun 4") } - dstOut[offset] = decode(br) + + // Read value and increment offset. + v := single[br.peekByteFast()>>shift].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= int(nBits) + out[offset] = uint8(v >> 8) offset++ } decoded += offset - dstEvery*i @@ -397,7 +1071,7 @@ bigloop: if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } - return s.Out, nil + return dst, nil } // matches will compare a decoding table to a coding table. diff --git a/redshiftsink/vendor/github.com/klauspost/compress/huff0/huff0.go b/redshiftsink/vendor/github.com/klauspost/compress/huff0/huff0.go index 53249df05..177d6c4ea 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -79,6 +79,13 @@ type Scratch struct { // Slice of the returned data. OutData []byte + // MaxDecodedSize will set the maximum allowed output size. + // This value will automatically be set to BlockSizeMax if not set. + // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. + MaxDecodedSize int + + br byteReader + // MaxSymbolValue will override the maximum symbol value of the next block. MaxSymbolValue uint8 @@ -95,12 +102,6 @@ type Scratch struct { // If WantLogLess == 0 any improvement will do. WantLogLess uint8 - // MaxDecodedSize will set the maximum allowed output size. - // This value will automatically be set to BlockSizeMax if not set. - // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. - MaxDecodedSize int - - br byteReader symbolLen uint16 // Length of active part of the symbol table. maxCount int // count of the most probable symbol clearCount bool // clear count diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/README.md b/redshiftsink/vendor/github.com/klauspost/compress/zstd/README.md index bc977a302..ac3640dc9 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/README.md +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/README.md @@ -5,11 +5,9 @@ It offers a very wide range of compression / speed trade-off, while being backed A high performance compression algorithm is implemented. For now focused on speed. This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. -Note that custom dictionaries are not supported yet, so if your code relies on that, -you cannot use the package as-is. +Note that custom dictionaries are only supported for decompression. This package is pure Go and without use of "unsafe". -If a significant speedup can be achieved using "unsafe", it may be added as an option later. The `zstd` package is provided as open source software using a Go standard license. @@ -142,80 +140,96 @@ Using the Encoder for both a stream and individual blocks concurrently is safe. I have collected some speed examples to compare speed and compression against other compressors. * `file` is the input file. -* `out` is the compressor used. `zskp` is this package. `gzstd` is gzip standard library. `zstd` is the Datadog cgo library. +* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library. * `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default". * `insize`/`outsize` is the input/output size. * `millis` is the number of milliseconds used for compression. * `mb/s` is megabytes (2^20 bytes) per second. ``` -The test data for the Large Text Compression Benchmark is the first -10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. -http://mattmahoney.net/dc/textdata.html - -file out level insize outsize millis mb/s -enwik9 zskp 1 1000000000 343833033 5840 163.30 -enwik9 zskp 2 1000000000 317822183 8449 112.87 -enwik9 gzstd 1 1000000000 382578136 13627 69.98 -enwik9 gzstd 3 1000000000 349139651 22344 42.68 -enwik9 zstd 1 1000000000 357416379 4838 197.12 -enwik9 zstd 3 1000000000 313734522 7556 126.21 +Silesia Corpus: +http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip -GOB stream of binary data. Highly compressible. -https://files.klauspost.com/compress/gob-stream.7z +This package: +file out level insize outsize millis mb/s +silesia.tar zskp 1 211947520 73101992 643 313.87 +silesia.tar zskp 2 211947520 67504318 969 208.38 +silesia.tar zskp 3 211947520 65177448 1899 106.44 -file out level insize outsize millis mb/s -gob-stream zskp 1 1911399616 234981983 5100 357.42 -gob-stream zskp 2 1911399616 208674003 6698 272.15 -gob-stream gzstd 1 1911399616 357382641 14727 123.78 -gob-stream gzstd 3 1911399616 327835097 17005 107.19 -gob-stream zstd 1 1911399616 250787165 4075 447.22 -gob-stream zstd 3 1911399616 208191888 5511 330.77 - -Highly compressible JSON file. Similar to logs in a lot of ways. -https://files.klauspost.com/compress/adresser.001.gz - -file out level insize outsize millis mb/s -adresser.001 zskp 1 1073741824 18510122 1477 692.83 -adresser.001 zskp 2 1073741824 19831697 1705 600.59 -adresser.001 gzstd 1 1073741824 47755503 3079 332.47 -adresser.001 gzstd 3 1073741824 40052381 3051 335.63 -adresser.001 zstd 1 1073741824 16135896 994 1030.18 -adresser.001 zstd 3 1073741824 17794465 905 1131.49 +cgo zstd: +silesia.tar zstd 1 211947520 73605392 543 371.56 +silesia.tar zstd 3 211947520 66793289 864 233.68 +silesia.tar zstd 6 211947520 62916450 1913 105.66 -VM Image, Linux mint with a few installed applications: -https://files.klauspost.com/compress/rawstudio-mint14.7z +gzip, stdlib/this package: +silesia.tar gzstd 1 211947520 80007735 1654 122.21 +silesia.tar gzkp 1 211947520 80369488 1168 173.06 -file out level insize outsize millis mb/s -rawstudio-mint14.tar zskp 1 8558382592 3648168838 33398 244.38 -rawstudio-mint14.tar zskp 2 8558382592 3376721436 50962 160.16 -rawstudio-mint14.tar gzstd 1 8558382592 3926257486 84712 96.35 -rawstudio-mint14.tar gzstd 3 8558382592 3740711978 176344 46.28 -rawstudio-mint14.tar zstd 1 8558382592 3607859742 27903 292.51 -rawstudio-mint14.tar zstd 3 8558382592 3341710879 46700 174.77 +GOB stream of binary data. Highly compressible. +https://files.klauspost.com/compress/gob-stream.7z +file out level insize outsize millis mb/s +gob-stream zskp 1 1911399616 235022249 3088 590.30 +gob-stream zskp 2 1911399616 205669791 3786 481.34 +gob-stream zskp 3 1911399616 185792019 9324 195.48 +gob-stream zstd 1 1911399616 249810424 2637 691.26 +gob-stream zstd 3 1911399616 208192146 3490 522.31 +gob-stream zstd 6 1911399616 193632038 6687 272.56 +gob-stream gzstd 1 1911399616 357382641 10251 177.82 +gob-stream gzkp 1 1911399616 362156523 5695 320.08 -The test data is designed to test archivers in realistic backup scenarios. -http://mattmahoney.net/dc/10gb.html +The test data for the Large Text Compression Benchmark is the first +10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. +http://mattmahoney.net/dc/textdata.html -file out level insize outsize millis mb/s -10gb.tar zskp 1 10065157632 4883149814 45715 209.97 -10gb.tar zskp 2 10065157632 4638110010 60970 157.44 -10gb.tar gzstd 1 10065157632 5198296126 97769 98.18 -10gb.tar gzstd 3 10065157632 4932665487 313427 30.63 -10gb.tar zstd 1 10065157632 4940796535 40391 237.65 -10gb.tar zstd 3 10065157632 4638618579 52911 181.42 +file out level insize outsize millis mb/s +enwik9 zskp 1 1000000000 343848582 3609 264.18 +enwik9 zskp 2 1000000000 317276632 5746 165.97 +enwik9 zskp 3 1000000000 294540704 11725 81.34 +enwik9 zstd 1 1000000000 358072021 3110 306.65 +enwik9 zstd 3 1000000000 313734672 4784 199.35 +enwik9 zstd 6 1000000000 295138875 10290 92.68 +enwik9 gzstd 1 1000000000 382578136 9604 99.30 +enwik9 gzkp 1 1000000000 383825945 6544 145.73 + +Highly compressible JSON file. +https://files.klauspost.com/compress/github-june-2days-2019.json.zst + +file out level insize outsize millis mb/s +github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 +github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 +github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 +github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 +github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 +github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 +github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 +github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03 -Silesia Corpus: -http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip +VM Image, Linux mint with a few installed applications: +https://files.klauspost.com/compress/rawstudio-mint14.7z -file out level insize outsize millis mb/s -silesia.tar zskp 1 211947520 73025800 1108 182.26 -silesia.tar zskp 2 211947520 67674684 1599 126.41 -silesia.tar gzstd 1 211947520 80007735 2515 80.37 -silesia.tar gzstd 3 211947520 73133380 4259 47.45 -silesia.tar zstd 1 211947520 73513991 933 216.64 -silesia.tar zstd 3 211947520 66793301 1377 146.79 +file out level insize outsize millis mb/s +rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 +rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 +rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 +rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 +rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 +rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 +rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 +rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49 + +CSV data: +https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst + +file out level insize outsize millis mb/s +nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 +nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 +nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 +nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 +nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 +nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 +nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 +nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53 ``` ### Converters @@ -309,6 +323,20 @@ The decoder can be used for *concurrent* decompression of multiple buffers. It will only allow a certain number of concurrent operations to run. To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +### Dictionaries + +Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed. + +Dictionaries are added individually to Decoders. +Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder. +To add a dictionary use the `WithDecoderDicts(dicts ...[]byte)` option with the dictionary data. +Several dictionaries can be added at once. + +The dictionary will be used automatically for the data that specifies them. +A re-used Decoder will still contain the dictionaries registered. + +When registering multiple dictionaries with the same ID, the last one will be used. + ### Allocation-less operation The decoder has been designed to operate without allocations after a warmup. @@ -350,36 +378,42 @@ These are some examples of performance compared to [datadog cgo library](https:/ The first two are streaming decodes and the last are smaller inputs. ``` -BenchmarkDecoderSilesia-8 20 642550210 ns/op 329.85 MB/s 3101 B/op 8 allocs/op -BenchmarkDecoderSilesiaCgo-8 100 384930000 ns/op 550.61 MB/s 451878 B/op 9713 allocs/op - -BenchmarkDecoderEnwik9-2 10 3146000080 ns/op 317.86 MB/s 2649 B/op 9 allocs/op -BenchmarkDecoderEnwik9Cgo-2 20 1905900000 ns/op 524.69 MB/s 1125120 B/op 45785 allocs/op - -BenchmarkDecoder_DecodeAll/z000000.zst-8 200 7049994 ns/op 138.26 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000001.zst-8 100000 19560 ns/op 97.49 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000002.zst-8 5000 297599 ns/op 236.99 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000003.zst-8 2000 725502 ns/op 141.17 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000004.zst-8 200000 9314 ns/op 54.54 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000005.zst-8 10000 137500 ns/op 104.72 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000006.zst-8 500 2316009 ns/op 206.06 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000007.zst-8 20000 64499 ns/op 344.90 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000008.zst-8 50000 24900 ns/op 219.56 MB/s 40 B/op 2 allocs/op -BenchmarkDecoder_DecodeAll/z000009.zst-8 1000 2348999 ns/op 154.01 MB/s 40 B/op 2 allocs/op - -BenchmarkDecoder_DecodeAllCgo/z000000.zst-8 500 4268005 ns/op 228.38 MB/s 1228849 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000001.zst-8 100000 15250 ns/op 125.05 MB/s 2096 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000002.zst-8 10000 147399 ns/op 478.49 MB/s 73776 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000003.zst-8 5000 320798 ns/op 319.27 MB/s 139312 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000004.zst-8 200000 10004 ns/op 50.77 MB/s 560 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000005.zst-8 20000 73599 ns/op 195.64 MB/s 19120 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000006.zst-8 1000 1119003 ns/op 426.48 MB/s 557104 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000007.zst-8 20000 103450 ns/op 215.04 MB/s 71296 B/op 9 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000008.zst-8 100000 20130 ns/op 271.58 MB/s 6192 B/op 3 allocs/op -BenchmarkDecoder_DecodeAllCgo/z000009.zst-8 2000 1123500 ns/op 322.00 MB/s 368688 B/op 3 allocs/op +BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op +BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op + +BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op +BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op + +Concurrent performance: + +BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op + +BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op ``` -This reflects the performance around May 2019, but this may be out of date. +This reflects the performance around May 2020, but this may be out of date. # Contributions diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/bitreader.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/bitreader.go index 15d79d439..854458537 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -5,6 +5,7 @@ package zstd import ( + "encoding/binary" "errors" "io" "math/bits" @@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBits(uint32(v))) return nil } @@ -63,21 +68,31 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. - v := b.in[b.off-4 : b.off] + // 2 bounds checks. + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 b.off -= 4 } +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // fill() will make sure at least 32 bits are available. func (b *bitReader) fill() { if b.bitsRead < 32 { return } if b.off >= 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockdec.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockdec.go index ed670bcc7..c8ec6e331 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -75,21 +75,29 @@ type blockDec struct { // Window size of the block. WindowSize uint64 - Type blockType - RLESize uint32 - // Is this the last block of a frame? - Last bool - - // Use less memory - lowMem bool history chan *history input chan struct{} result chan decodeOutput sequenceBuf []seq - tmp [4]byte err error decWG sync.WaitGroup + + // Frame to use for singlethreaded decoding. + // Should not be used by the decoder itself since parent may be another frame. + localFrame *frameDec + + // Block is RLE, this is the size. + RLESize uint32 + tmp [4]byte + + Type blockType + + // Is this the last block of a frame? + Last bool + + // Use less memory + lowMem bool } func (b *blockDec) String() string { @@ -127,25 +135,37 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.Type = blockType((bh >> 1) & 3) // find size. cSize := int(bh >> 3) + maxSize := maxBlockSize switch b.Type { case blockTypeReserved: return ErrReservedBlockType case blockTypeRLE: b.RLESize = uint32(cSize) + if b.lowMem { + maxSize = cSize + } cSize = 1 case blockTypeCompressed: if debug { println("Data size on stream:", cSize) } b.RLESize = 0 + maxSize = maxCompressedBlockSize + if windowSize < maxCompressedBlockSize && b.lowMem { + maxSize = int(windowSize) + } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { if debug { printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) } return ErrCompressedSizeTooBig } - default: + case blockTypeRaw: b.RLESize = 0 + // We do not need a destination for raw blocks. + maxSize = -1 + default: + panic("Invalid block type") } // Read block data. @@ -156,8 +176,8 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.dataStorage = make([]byte, 0, maxBlockSize) } } - if cap(b.dst) <= maxBlockSize { - b.dst = make([]byte, 0, maxBlockSize+1) + if cap(b.dst) <= maxSize { + b.dst = make([]byte, 0, maxSize+1) } var err error b.data, err = br.readBig(cSize, b.dataStorage) @@ -445,26 +465,22 @@ func (b *blockDec) decodeCompressed(hist *history) error { if huff == nil { huff = &huff0.Scratch{} } - huff.Out = b.literalBuf[:0] huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) return err } // Use our out buffer. - huff.Out = b.literalBuf[:0] - huff.MaxDecodedSize = litRegenSize if fourStreams { - literals, err = huff.Decompress4X(literals, litRegenSize) + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { - literals, err = huff.Decompress1X(literals) + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) } if err != nil { println("decoding compressed literals:", err) return err } // Make sure we don't leak our literals buffer - huff.Out = nil if len(literals) != litRegenSize { return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } @@ -615,15 +631,12 @@ func (b *blockDec) decodeCompressed(hist *history) error { var err error // Use our out buffer. huff = hist.huffTree - huff.Out = b.literalBuf[:0] - huff.MaxDecodedSize = litRegenSize if fourStreams { - literals, err = huff.Decompress4X(literals, litRegenSize) + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) } else { - literals, err = huff.Decompress1X(literals) + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) } // Make sure we don't leak our literals buffer - huff.Out = nil if err != nil { println("decompressing literals:", err) return err @@ -633,12 +646,13 @@ func (b *blockDec) decodeCompressed(hist *history) error { } } else { if hist.huffTree != nil && huff != nil { - huffDecoderPool.Put(hist.huffTree) + if hist.dict == nil || hist.dict.litDec != hist.huffTree { + huffDecoderPool.Put(hist.huffTree) + } hist.huffTree = nil } } if huff != nil { - huff.Out = nil hist.huffTree = huff } if debug { @@ -671,12 +685,21 @@ func (b *blockDec) decodeCompressed(hist *history) error { // If only recent offsets were not transferred, this would be an obvious win. // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded. + hbytes := hist.b + if len(hbytes) > hist.windowSize { + hbytes = hbytes[len(hbytes)-hist.windowSize:] + // We do not need history any more. + if hist.dict != nil { + hist.dict.content = nil + } + } + if err := seqs.initialize(br, hist, literals, b.dst); err != nil { println("initializing sequences:", err) return err } - err = seqs.decode(nSeqs, br, hist.b) + err = seqs.decode(nSeqs, br, hbytes) if err != nil { return err } diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockenc.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockenc.go index 507757d52..c584f6aab 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -444,9 +444,9 @@ func fuzzFseEncoder(data []byte) int { } // encode will encode the block and append the output in b.output. -func (b *blockEnc) encode(raw bool) error { +func (b *blockEnc) encode(raw, rawAllLits bool) error { if len(b.sequences) == 0 { - return b.encodeLits(raw) + return b.encodeLits(rawAllLits) } // We want some difference if len(b.literals) > (b.size - (b.size >> 5)) { @@ -806,7 +806,7 @@ func (b *blockEnc) genCodes() { mlH[v]++ if v > mlMax { mlMax = v - if debug && mlMax > maxMatchLengthSymbol { + if debugAsserts && mlMax > maxMatchLengthSymbol { panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) } } @@ -821,13 +821,13 @@ func (b *blockEnc) genCodes() { } return int(max) } - if mlMax > maxMatchLengthSymbol { + if debugAsserts && mlMax > maxMatchLengthSymbol { panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax)) } - if ofMax > maxOffsetBits { + if debugAsserts && ofMax > maxOffsetBits { panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax)) } - if llMax > maxLiteralLengthSymbol { + if debugAsserts && llMax > maxLiteralLengthSymbol { panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax)) } diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytebuf.go index 07321acb1..658ef7838 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -30,7 +30,7 @@ type byteBuffer interface { type byteBuf []byte func (b *byteBuf) readSmall(n int) []byte { - if debug && n > 8 { + if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } bb := *b @@ -82,7 +82,7 @@ type readerWrapper struct { } func (r *readerWrapper) readSmall(n int) []byte { - if debug && n > 8 { + if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } n2, err := io.ReadFull(r.r, r.tmp[:n]) diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytereader.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytereader.go index dc4378b64..2c4fca17f 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytereader.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/bytereader.go @@ -31,7 +31,8 @@ func (b *byteReader) overread() bool { // Int32 returns a little endian int32 starting at current offset. func (b byteReader) Int32() int32 { - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] v3 := int32(b2[3]) v2 := int32(b2[2]) v1 := int32(b2[1]) @@ -55,7 +56,20 @@ func (b byteReader) Uint32() uint32 { } return v } - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// Uint32NC returns a little endian uint32 starting at current offset. +// The caller must be sure if there are at least 4 bytes left. +func (b byteReader) Uint32NC() uint32 { + b2 := b.b[b.off:] + b2 = b2[:4] v3 := uint32(b2[3]) v2 := uint32(b2[2]) v1 := uint32(b2[1]) diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder.go index 35a3cda91..66b51bf2d 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -23,17 +23,15 @@ type Decoder struct { // Unreferenced decoders, ready for use. decoders chan *blockDec - // Unreferenced decoders, ready for use. - frames chan *frameDec - // Streams ready to be decoded. stream chan decodeStream // Current read position used for Reader functionality. current decoderState - // Custom dictionaries - dicts map[uint32]struct{} + // Custom dictionaries. + // Always uses copies. + dicts map[uint32]dict // streamWg is the waitgroup for all streams streamWg sync.WaitGroup @@ -66,7 +64,7 @@ var ( // A Decoder can be used in two modes: // // 1) As a stream, or -// 2) For stateless decoding using DecodeAll or DecodeBuffer. +// 2) For stateless decoding using DecodeAll. // // Only a single stream can be decoded concurrently, but the same decoder // can run multiple concurrent stateless decodes. It is even possible to @@ -87,12 +85,19 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { d.current.output = make(chan decodeOutput, d.o.concurrent) d.current.flushed = true + // Transfer option dicts. + d.dicts = make(map[uint32]dict, len(d.o.dicts)) + for _, dc := range d.o.dicts { + d.dicts[dc.id] = dc + } + d.o.dicts = nil + // Create decoders d.decoders = make(chan *blockDec, d.o.concurrent) - d.frames = make(chan *frameDec, d.o.concurrent) for i := 0; i < d.o.concurrent; i++ { - d.frames <- newFrameDec(d.o) - d.decoders <- newBlockDec(d.o.lowMem) + dec := newBlockDec(d.o.lowMem) + dec.localFrame = newFrameDec(d.o) + d.decoders <- dec } if r == nil { @@ -169,7 +174,12 @@ func (d *Decoder) Reset(r io.Reader) error { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } b := bb.Bytes() - dst, err := d.DecodeAll(b, nil) + var dst []byte + if cap(d.current.b) > 0 { + dst = d.current.b + } + + dst, err := d.DecodeAll(b, dst[:0]) if err == nil { err = io.EOF } @@ -277,23 +287,31 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } // Grab a block decoder and frame decoder. - block, frame := <-d.decoders, <-d.frames + block := <-d.decoders + frame := block.localFrame defer func() { if debug { printf("re-adding decoder: %p", block) } - d.decoders <- block frame.rawInput = nil frame.bBuf = nil - d.frames <- frame + d.decoders <- block }() frame.bBuf = input for { + frame.history.reset() err := frame.reset(&frame.bBuf) if err == io.EOF { return dst, nil } + if frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + return nil, ErrUnknownDictionary + } + frame.history.setDict(&dict) + } if err != nil { return dst, err } @@ -315,7 +333,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { if size > 1<<20 { size = 1 << 20 } - dst = make([]byte, 0, frame.WindowSize) + dst = make([]byte, 0, size) } dst, err = frame.runDecoder(dst, block) @@ -456,10 +474,19 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { br := readerWrapper{r: stream.r} decodeStream: for { + frame.history.reset() err := frame.reset(&br) if debug && err != nil { println("Frame decoder returned", err) } + if err == nil && frame.DictionaryID != nil { + dict, ok := d.dicts[*frame.DictionaryID] + if !ok { + err = ErrUnknownDictionary + } else { + frame.history.setDict(&dict) + } + } if err != nil { stream.output <- decodeOutput{ err: err, diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 2ac9cd2dd..284d38449 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -18,6 +18,7 @@ type decoderOptions struct { lowMem bool concurrent int maxDecodedSize uint64 + dicts []dict } func (o *decoderOptions) setDefault() { @@ -66,3 +67,18 @@ func WithDecoderMaxMemory(n uint64) DOption { return nil } } + +// WithDecoderDicts allows to register one or more dictionaries for the decoder. +// If several dictionaries with the same ID is provided the last one will be used. +func WithDecoderDicts(dicts ...[]byte) DOption { + return func(o *decoderOptions) error { + for _, b := range dicts { + d, err := loadDict(b) + if err != nil { + return err + } + o.dicts = append(o.dicts, *d) + } + return nil + } +} diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/dict.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/dict.go new file mode 100644 index 000000000..8eb6f6ba3 --- /dev/null +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/dict.go @@ -0,0 +1,104 @@ +package zstd + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/klauspost/compress/huff0" +) + +type dict struct { + id uint32 + + litDec *huff0.Scratch + llDec, ofDec, mlDec sequenceDec + offsets [3]int + content []byte +} + +var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} + +// Load a dictionary as described in +// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format +func loadDict(b []byte) (*dict, error) { + // Check static field size. + if len(b) <= 8+(3*4) { + return nil, io.ErrUnexpectedEOF + } + d := dict{ + llDec: sequenceDec{fse: &fseDecoder{}}, + ofDec: sequenceDec{fse: &fseDecoder{}}, + mlDec: sequenceDec{fse: &fseDecoder{}}, + } + if !bytes.Equal(b[:4], dictMagic[:]) { + return nil, ErrMagicMismatch + } + d.id = binary.LittleEndian.Uint32(b[4:8]) + if d.id == 0 { + return nil, errors.New("dictionaries cannot have ID 0") + } + + // Read literal table + var err error + d.litDec, b, err = huff0.ReadTable(b[8:], nil) + if err != nil { + return nil, err + } + + br := byteReader{ + b: b, + off: 0, + } + readDec := func(i tableIndex, dec *fseDecoder) error { + if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil { + return err + } + if br.overread() { + return io.ErrUnexpectedEOF + } + err = dec.transform(symbolTableX[i]) + if err != nil { + println("Transform table error:", err) + return err + } + if debug { + println("Read table ok", "symbolLen:", dec.symbolLen) + } + // Set decoders as predefined so they aren't reused. + dec.preDefined = true + return nil + } + + if err := readDec(tableOffsets, d.ofDec.fse); err != nil { + return nil, err + } + if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil { + return nil, err + } + if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil { + return nil, err + } + if br.remain() < 12 { + return nil, io.ErrUnexpectedEOF + } + + d.offsets[0] = int(br.Uint32()) + br.advance(4) + d.offsets[1] = int(br.Uint32()) + br.advance(4) + d.offsets[2] = int(br.Uint32()) + br.advance(4) + if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 { + return nil, errors.New("invalid offset in dictionary") + } + d.content = make([]byte, br.remain()) + copy(d.content, br.unread()) + if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) { + return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets) + } + + return &d, nil +} diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_better.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_better.go new file mode 100644 index 000000000..c120d9054 --- /dev/null +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -0,0 +1,518 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +const ( + betterLongTableBits = 19 // Bits used in the long match table + betterLongTableSize = 1 << betterLongTableBits // Size of the table + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + betterShortTableBits = 13 // Bits used in the short match table + betterShortTableSize = 1 << betterShortTableBits // Size of the table +) + +type prevEntry struct { + offset int32 + prev int32 +} + +// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type betterFastEncoder struct { + fastBase + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +// Encode improves compression... +func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.Encode(blk, src) +} diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index ee3b09b02..50276bcde 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -4,6 +4,8 @@ package zstd +import "fmt" + const ( dFastLongTableBits = 17 // Bits used in the long match table dFastLongTableSize = 1 << dFastLongTableBits // Size of the table @@ -29,7 +31,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur > (1<<30)+e.maxMatchOff { + for e.cur >= bufferReset { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} @@ -61,6 +63,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { e.longTable[i].offset = v } e.cur = e.maxMatchOff + break } s := e.addBlock(src) @@ -77,10 +80,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { sLimit := int32(len(src)) - inputMargin // stepSize is the number of bytes to skip on every main loop iteration. // It should be >= 1. - stepSize := int32(e.o.targetLength) - if stepSize == 0 { - stepSize++ - } + const stepSize = 1 const kSearchStrength = 8 @@ -110,7 +110,7 @@ encodeLoop: canRepeat := len(blk.sequences) > 2 for { - if debug && canRepeat && offset1 == 0 { + if debugAsserts && canRepeat && offset1 == 0 { panic("offset0 was 0") } @@ -169,55 +169,6 @@ encodeLoop: cv = load6432(src, s) continue } - const repOff2 = 1 - // We deviate from the reference encoder and also check offset 2. - // Slower and not consistently better, so disabled. - // repIndex = s - offset2 + repOff2 - if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) { - // Consider history as well. - var seq seq - lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src) - - seq.matchLen = uint32(lenght - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 2 - seq.offset = 2 - if debugSequences { - println("repeat sequence 2", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 - nextEmit = s - if s >= sLimit { - if debug { - println("repeat ended", s, lenght) - - } - break encodeLoop - } - cv = load6432(src, s) - // Swap offsets - offset1, offset2 = offset2, offset1 - continue - } } // Find the offsets of our two matches. coffsetL := s - (candidateL.offset - e.cur) @@ -229,10 +180,10 @@ encodeLoop: // Reference encoder checks all 8 bytes, we only check 4, // but the likelihood of both the first 4 bytes and the hash matching should be enough. t = candidateL.offset - e.cur - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } if debugMatches { @@ -266,13 +217,13 @@ encodeLoop: } t = candidateS.offset - e.cur - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } - if debug && t < 0 { + if debugAsserts && t < 0 { panic("t<0") } if debugMatches { @@ -294,11 +245,11 @@ encodeLoop: offset2 = offset1 offset1 = s - t - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && canRepeat && int(offset1) > len(src) { + if debugAsserts && canRepeat && int(offset1) > len(src) { panic("invalid offset") } @@ -369,7 +320,7 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashS := hash5(cv, dFastShortTableBits) nextHashL := hash8(cv, dFastLongTableBits) // We have at least 4 byte match. @@ -424,7 +375,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - if e.cur > (1<<30)+e.maxMatchOff { + if e.cur >= bufferReset { for i := range e.table[:] { e.table[i] = tableEntry{} } @@ -447,10 +398,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { sLimit := int32(len(src)) - inputMargin // stepSize is the number of bytes to skip on every main loop iteration. // It should be >= 1. - stepSize := int32(e.o.targetLength) - if stepSize == 0 { - stepSize++ - } + const stepSize = 1 const kSearchStrength = 8 @@ -545,10 +493,10 @@ encodeLoop: // Reference encoder checks all 8 bytes, we only check 4, // but the likelihood of both the first 4 bytes and the hash matching should be enough. t = candidateL.offset - e.cur - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } if debugMatches { @@ -582,13 +530,13 @@ encodeLoop: } t = candidateS.offset - e.cur - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } - if debug && t < 0 { + if debugAsserts && t < 0 { panic("t<0") } if debugMatches { @@ -610,8 +558,8 @@ encodeLoop: offset2 = offset1 offset1 = s - t - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } // Extend the 4-byte match as long as possible. @@ -723,4 +671,8 @@ encodeLoop: println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } + // We do not store history, so we must offset e.cur to avoid false matches for next user. + if e.cur < bufferReset { + e.cur += int32(len(src)) + } } diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 0bdddac5b..4104b456c 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -5,6 +5,8 @@ package zstd import ( + "fmt" + "math" "math/bits" "github.com/klauspost/compress/zstd/internal/xxhash" @@ -22,26 +24,29 @@ type tableEntry struct { offset int32 } -type fastEncoder struct { - o encParams +type fastBase struct { // cur is the offset at the start of hist cur int32 // maximum offset. Should be at least 2x block size. maxMatchOff int32 hist []byte crc *xxhash.Digest - table [tableSize]tableEntry tmp [8]byte blk *blockEnc } +type fastEncoder struct { + fastBase + table [tableSize]tableEntry +} + // CRC returns the underlying CRC writer. -func (e *fastEncoder) CRC() *xxhash.Digest { +func (e *fastBase) CRC() *xxhash.Digest { return e.crc } // AppendCRC will append the CRC to the destination slice and return it. -func (e *fastEncoder) AppendCRC(dst []byte) []byte { +func (e *fastBase) AppendCRC(dst []byte) []byte { crc := e.crc.Sum(e.tmp[:0]) dst = append(dst, crc[7], crc[6], crc[5], crc[4]) return dst @@ -49,7 +54,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte { // WindowSize returns the window size of the encoder, // or a window size small enough to contain the input size, if > 0. -func (e *fastEncoder) WindowSize(size int) int32 { +func (e *fastBase) WindowSize(size int) int32 { if size > 0 && size < int(e.maxMatchOff) { b := int32(1) << uint(bits.Len(uint(size))) // Keep minimum window. @@ -62,7 +67,7 @@ func (e *fastEncoder) WindowSize(size int) int32 { } // Block returns the current block. -func (e *fastEncoder) Block() *blockEnc { +func (e *fastBase) Block() *blockEnc { return e.blk } @@ -74,7 +79,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur > (1<<30)+e.maxMatchOff { + for e.cur >= bufferReset { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} @@ -94,6 +99,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { e.table[i].offset = v } e.cur = e.maxMatchOff + break } s := e.addBlock(src) @@ -110,11 +116,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { sLimit := int32(len(src)) - inputMargin // stepSize is the number of bytes to skip on every main loop iteration. // It should be >= 2. - stepSize := int32(e.o.targetLength) - if stepSize == 0 { - stepSize++ - } - stepSize++ + const stepSize = 2 // TEMPLATE const hashLog = tableBits @@ -151,7 +153,7 @@ encodeLoop: canRepeat := len(blk.sequences) > 2 for { - if debug && canRepeat && offset1 == 0 { + if debugAsserts && canRepeat && offset1 == 0 { panic("offset0 was 0") } @@ -167,9 +169,22 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+6, repIndex+4, src) + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -195,11 +210,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -212,10 +227,10 @@ encodeLoop: if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { // found a regular match t = candidate.offset - e.cur - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } break @@ -225,13 +240,13 @@ encodeLoop: // found a regular match t = candidate2.offset - e.cur s++ - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } - if debug && t < 0 { + if debugAsserts && t < 0 { panic("t<0") } break @@ -246,16 +261,29 @@ encodeLoop: offset2 = offset1 offset1 = s - t - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && canRepeat && int(offset1) > len(src) { + if debugAsserts && canRepeat && int(offset1) > len(src) { panic("invalid offset") } // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -292,7 +320,20 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -342,8 +383,9 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { panic("src too big") } } + // Protect against e.cur wraparound. - if e.cur > (1<<30)+e.maxMatchOff { + if e.cur >= bufferReset { for i := range e.table[:] { e.table[i] = tableEntry{} } @@ -410,10 +452,23 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // lenght := 4 + e.matchlen(s+6, repIndex+4, src) - lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + // length := 4 + e.matchlen(s+6, repIndex+4, src) + // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + var length int32 + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -439,11 +494,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -456,12 +511,15 @@ encodeLoop: if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { // found a regular match t = candidate.offset - e.cur - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } + if debugAsserts && t < 0 { + panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff)) + } break } @@ -469,13 +527,13 @@ encodeLoop: // found a regular match t = candidate2.offset - e.cur s++ - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debug && s-t > e.maxMatchOff { + if debugAsserts && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } - if debug && t < 0 { + if debugAsserts && t < 0 { panic("t<0") } break @@ -490,13 +548,29 @@ encodeLoop: offset2 = offset1 offset1 = s - t - if debug && s <= t { - panic("s <= t") + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } + if debugAsserts && t < 0 { + panic(fmt.Sprintf("t (%d) < 0 ", t)) + } // Extend the 4-byte match as long as possible. //l := e.matchlenNoHist(s+4, t+4, src) + 4 - l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -534,7 +608,20 @@ encodeLoop: // We have at least 4 byte match. // No need to check backwards. We come straight from a match //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -567,9 +654,16 @@ encodeLoop: if debug { println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } + // We do not store history, so we must offset e.cur to avoid false matches for next user. + if e.cur < bufferReset { + e.cur += int32(len(src)) + } } -func (e *fastEncoder) addBlock(src []byte) int32 { +func (e *fastBase) addBlock(src []byte) int32 { + if debugAsserts && e.cur > bufferReset { + panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) + } // check if we have space already if len(e.hist)+len(src) > cap(e.hist) { if cap(e.hist) == 0 { @@ -597,39 +691,41 @@ func (e *fastEncoder) addBlock(src []byte) int32 { // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. -func (e *fastEncoder) UseBlock(enc *blockEnc) { +func (e *fastBase) UseBlock(enc *blockEnc) { enc.reset(e.blk) e.blk = enc } -func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { +func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { // Extend the match to be as long as possible. return int32(matchLen(src[s:], src[t:])) } -func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { - if debug { +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { + if debugAsserts { if s < 0 { - panic("s<0") + err := fmt.Sprintf("s (%d) < 0", s) + panic(err) } if t < 0 { - panic("t<0") + err := fmt.Sprintf("s (%d) < 0", s) + panic(err) } if s-t > e.maxMatchOff { - panic(s - t) + err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) + panic(err) + } + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) } - } - s1 := int(s) + maxMatchLength - 4 - if s1 > len(src) { - s1 = len(src) } // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. -func (e *fastEncoder) Reset() { +func (e *fastBase) Reset(singleBlock bool) { if e.blk == nil { e.blk = &blockEnc{} e.blk.init() @@ -642,7 +738,7 @@ func (e *fastEncoder) Reset() { } else { e.crc.Reset() } - if cap(e.hist) < int(e.maxMatchOff*2) { + if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) { l := e.maxMatchOff * 2 // Make it at least 1MB. if l < 1<<20 { @@ -650,7 +746,10 @@ func (e *fastEncoder) Reset() { } e.hist = make([]byte, 0, l) } - // We offset current position so everything will be out of reach - e.cur += e.maxMatchOff + int32(len(e.hist)) + // We offset current position so everything will be out of reach. + // If above reset line, history will be purged. + if e.cur < bufferReset { + e.cur += e.maxMatchOff + int32(len(e.hist)) + } e.hist = e.hist[:0] } diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_params.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_params.go index b6779ecb6..d874116f7 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_params.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/enc_params.go @@ -4,6 +4,8 @@ package zstd +/* +// encParams are not really used, just here for reference. type encParams struct { // largest match distance : larger == more compression, more memory needed during decompression windowLog uint8 @@ -152,3 +154,4 @@ var defEncParams = [4][]encParams{ {14, 15, 15, 10, 3, 999, strategyBtultra2}, // level 22. }, } +*/ diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder.go index 366dd66bd..c56d2241f 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -35,21 +35,22 @@ type encoder interface { AppendCRC([]byte) []byte WindowSize(size int) int32 UseBlock(*blockEnc) - Reset() + Reset(singleBlock bool) } type encoderState struct { - w io.Writer - filling []byte - current []byte - previous []byte - encoder encoder - writing *blockEnc - err error - writeErr error - nWritten int64 - headerWritten bool - eofWritten bool + w io.Writer + filling []byte + current []byte + previous []byte + encoder encoder + writing *blockEnc + err error + writeErr error + nWritten int64 + headerWritten bool + eofWritten bool + fullFrameWritten bool // This waitgroup indicates an encode is running. wg sync.WaitGroup @@ -71,27 +72,26 @@ func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { } if w != nil { e.Reset(w) - } else { - e.init.Do(func() { - e.initialize() - }) } return &e, nil } func (e *Encoder) initialize() { + if e.o.concurrent == 0 { + e.o.setDefault() + } e.encoders = make(chan encoder, e.o.concurrent) for i := 0; i < e.o.concurrent; i++ { - e.encoders <- e.o.encoder() + enc := e.o.encoder() + // If not single block, history will be allocated on first use. + enc.Reset(true) + e.encoders <- enc } } // Reset will re-initialize the writer and new writes will encode to the supplied writer // as a new, independent stream. func (e *Encoder) Reset(w io.Writer) { - e.init.Do(func() { - e.initialize() - }) s := &e.state s.wg.Wait() s.wWg.Wait() @@ -115,9 +115,10 @@ func (e *Encoder) Reset(w io.Writer) { s.filling = s.filling[:0] s.current = s.current[:0] s.previous = s.previous[:0] - s.encoder.Reset() + s.encoder.Reset(false) s.headerWritten = false s.eofWritten = false + s.fullFrameWritten = false s.w = w s.err = nil s.nWritten = 0 @@ -156,7 +157,7 @@ func (e *Encoder) Write(p []byte) (n int, err error) { if err != nil { return n, err } - if debug && len(s.filling) > 0 { + if debugAsserts && len(s.filling) > 0 { panic(len(s.filling)) } } @@ -176,6 +177,22 @@ func (e *Encoder) nextBlock(final bool) error { return fmt.Errorf("block > maxStoreBlockSize") } if !s.headerWritten { + // If we have a single block encode, do a sync compression. + if final && len(s.filling) > 0 { + s.current = e.EncodeAll(s.filling, s.current[:0]) + var n2 int + n2, s.err = s.w.Write(s.current) + if s.err != nil { + return s.err + } + s.nWritten += int64(n2) + s.current = s.current[:0] + s.filling = s.filling[:0] + s.headerWritten = true + s.fullFrameWritten = true + return nil + } + var tmp [maxHeaderSize]byte fh := frameHeader{ ContentSize: 0, @@ -263,7 +280,7 @@ func (e *Encoder) nextBlock(final bool) error { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. if len(src) != len(blk.literals) || len(src) != e.o.blockSize { - err = blk.encode(e.o.noEntropy) + err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) } switch err { case errIncompressible: @@ -298,7 +315,9 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { src := e.state.filling for { n2, err := r.Read(src) - _, _ = e.state.encoder.CRC().Write(src[:n2]) + if e.o.crc { + _, _ = e.state.encoder.CRC().Write(src[:n2]) + } // src is now the unfilled part... src = src[n2:] n += int64(n2) @@ -363,6 +382,9 @@ func (e *Encoder) Close() error { if err != nil { return err } + if e.state.fullFrameWritten { + return s.err + } s.wg.Wait() s.wWg.Wait() @@ -422,18 +444,14 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(func() { - e.o.setDefault() - e.initialize() - }) + e.init.Do(e.initialize) enc := <-e.encoders defer func() { // Release encoder reference to last block. - enc.Reset() + // If a non-single block is needed the encoder will reset again. + enc.Reset(true) e.encoders <- enc }() - enc.Reset() - blk := enc.Block() // Use single segments when above minimum window and below 1MB. single := len(src) < 1<<20 && len(src) > MinWindowSize if e.o.single != nil { @@ -456,12 +474,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { panic(err) } - if len(src) <= e.o.blockSize && len(src) <= maxBlockSize { + // If we can do everything in one block, prefer that. + if len(src) <= maxCompressedBlockSize { // Slightly faster with no history and everything in one block. if e.o.crc { _, _ = enc.CRC().Write(src) } - blk.reset(nil) + blk := enc.Block() blk.last = true enc.EncodeNoHist(blk, src) @@ -472,7 +491,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(blk.literals) != len(src) || len(src) != e.o.blockSize { // Output directly to dst blk.output = dst - err = blk.encode(e.o.noEntropy) + err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) } switch err { @@ -488,6 +507,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } blk.output = oldout } else { + enc.Reset(false) + blk := enc.Block() for len(src) > 0 { todo := src if len(todo) > e.o.blockSize { @@ -507,7 +528,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { - err = blk.encode(e.o.noEntropy) + err = blk.encode(e.o.noEntropy, !e.o.allLitEntropy) } switch err { diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 40eb45733..dfac14ddd 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -12,15 +12,18 @@ type EOption func(*encoderOptions) error // options retains accumulated state of multiple options. type encoderOptions struct { - concurrent int - crc bool - single *bool - pad int - blockSize int - windowSize int - level EncoderLevel - fullZero bool - noEntropy bool + concurrent int + level EncoderLevel + single *bool + pad int + blockSize int + windowSize int + crc bool + fullZero bool + noEntropy bool + allLitEntropy bool + customWindow bool + customALEntropy bool } func (o *encoderOptions) setDefault() { @@ -30,7 +33,7 @@ func (o *encoderOptions) setDefault() { crc: true, single: nil, blockSize: 1 << 16, - windowSize: 1 << 22, + windowSize: 8 << 20, level: SpeedDefault, } } @@ -39,9 +42,11 @@ func (o *encoderOptions) setDefault() { func (o encoderOptions) encoder() encoder { switch o.level { case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{maxMatchOff: int32(o.windowSize)}} + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + case SpeedBetterCompression: + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} case SpeedFastest: - return &fastEncoder{maxMatchOff: int32(o.windowSize)} + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} } panic("unknown compression level") } @@ -67,7 +72,7 @@ func WithEncoderConcurrency(n int) EOption { } // WithWindowSize will set the maximum allowed back-reference distance. -// The value must be a power of two between WindowSizeMin and WindowSizeMax. +// The value must be a power of two between MinWindowSize and MaxWindowSize. // A larger value will enable better compression but allocate more memory and, // for above-default values, take considerably longer. // The default value is determined by the compression level. @@ -83,6 +88,7 @@ func WithWindowSize(n int) EOption { } o.windowSize = n + o.customWindow = true if o.blockSize > o.windowSize { o.blockSize = o.windowSize } @@ -130,18 +136,18 @@ const ( // This is roughly equivalent to the default Zstandard mode (level 3). SpeedDefault + // SpeedBetterCompression will yield better compression than the default. + // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. + // By using this, notice that CPU usage may go up in the future. + SpeedBetterCompression + // speedLast should be kept as the last actual compression option. // The is not for external usage, but is used to keep track of the valid options. speedLast - // SpeedBetterCompression will (in the future) yield better compression than the default, - // but at approximately 4x the CPU usage of the default. - // For now this is not implemented. - SpeedBetterCompression = SpeedDefault - // SpeedBestCompression will choose the best available compression option. // For now this is not implemented. - SpeedBestCompression = SpeedDefault + SpeedBestCompression = SpeedBetterCompression ) // EncoderLevelFromString will convert a string representation of an encoding level back @@ -163,8 +169,10 @@ func EncoderLevelFromZstd(level int) EncoderLevel { switch { case level < 3: return SpeedFastest - case level >= 3: + case level >= 3 && level < 6: return SpeedDefault + case level > 5: + return SpeedBetterCompression } return SpeedDefault } @@ -176,6 +184,8 @@ func (e EncoderLevel) String() string { return "fastest" case SpeedDefault: return "default" + case SpeedBetterCompression: + return "better" default: return "invalid" } @@ -189,6 +199,20 @@ func WithEncoderLevel(l EncoderLevel) EOption { return fmt.Errorf("unknown encoder level") } o.level = l + if !o.customWindow { + switch o.level { + case SpeedFastest: + o.windowSize = 4 << 20 + case SpeedDefault: + o.windowSize = 8 << 20 + case SpeedBetterCompression: + o.windowSize = 16 << 20 + } + } + if !o.customALEntropy { + o.allLitEntropy = l > SpeedFastest + } + return nil } } @@ -203,6 +227,18 @@ func WithZeroFrames(b bool) EOption { } } +// WithAllLitEntropyCompression will apply entropy compression if no matches are found. +// Disabling this will skip incompressible data faster, but in cases with no matches but +// skewed character distribution compression is lost. +// Default value depends on the compression level selected. +func WithAllLitEntropyCompression(b bool) EOption { + return func(o *encoderOptions) error { + o.customALEntropy = true + o.allLitEntropy = b + return nil + } +} + // WithNoEntropyCompression will always skip entropy compression of literals. // This can be useful if content has matches, but unlikely to benefit from entropy // compression. Usually the slight speed improvement is not worth enabling this. diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/framedec.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/framedec.go index 40790747a..fc4a566d3 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -16,16 +16,11 @@ import ( ) type frameDec struct { - o decoderOptions - crc hash.Hash64 - frameDone sync.WaitGroup - offset int64 + o decoderOptions + crc hash.Hash64 + offset int64 - WindowSize uint64 - DictionaryID uint32 - FrameContentSize uint64 - HasCheckSum bool - SingleSegment bool + WindowSize uint64 // maxWindowSize is the maximum windows size to support. // should never be bigger than max-int. @@ -42,15 +37,22 @@ type frameDec struct { // Byte buffer that can be reused for small input blocks. bBuf byteBuf + FrameContentSize uint64 + frameDone sync.WaitGroup + + DictionaryID *uint32 + HasCheckSum bool + SingleSegment bool + // asyncRunning indicates whether the async routine processes input on 'decoding'. - asyncRunning bool asyncRunningMu sync.Mutex + asyncRunning bool } const ( // The minimum Window_Size is 1 KB. MinWindowSize = 1 << 10 - MaxWindowSize = 1 << 30 + MaxWindowSize = 1 << 29 ) var ( @@ -140,7 +142,7 @@ func (d *frameDec) reset(br byteBuffer) error { // Read Dictionary_ID // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - d.DictionaryID = 0 + d.DictionaryID = nil if size := fhd & 3; size != 0 { if size == 3 { size = 4 @@ -152,19 +154,22 @@ func (d *frameDec) reset(br byteBuffer) error { } return io.ErrUnexpectedEOF } + var id uint32 switch size { case 1: - d.DictionaryID = uint32(b[0]) + id = uint32(b[0]) case 2: - d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) + id = uint32(b[0]) | (uint32(b[1]) << 8) case 4: - d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) } if debug { - println("Dict size", size, "ID:", d.DictionaryID) + println("Dict size", size, "ID:", id) } - if d.DictionaryID != 0 { - return ErrUnknownDictionary + if id > 0 { + // ID 0 means "sorry, no dictionary anyway". + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format + d.DictionaryID = &id } } @@ -231,7 +236,11 @@ func (d *frameDec) reset(br byteBuffer) error { return ErrWindowSizeTooSmall } d.history.windowSize = int(d.WindowSize) - d.history.maxSize = d.history.windowSize + maxBlockSize + if d.o.lowMem && d.history.windowSize < maxBlockSize { + d.history.maxSize = d.history.windowSize * 2 + } else { + d.history.maxSize = d.history.windowSize + maxBlockSize + } // history contains input - maybe we do something d.rawInput = br return nil @@ -318,8 +327,8 @@ func (d *frameDec) checkCRC() error { func (d *frameDec) initAsync() { if !d.o.lowMem && !d.SingleSegment { - // set max extra size history to 20MB. - d.history.maxSize = d.history.windowSize + maxBlockSize*10 + // set max extra size history to 10MB. + d.history.maxSize = d.history.windowSize + maxBlockSize*5 } // re-alloc if more than one extra block size. if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize { @@ -345,8 +354,6 @@ func (d *frameDec) initAsync() { // When the frame has finished decoding the *bufio.Reader // containing the remaining input will be sent on frameDec.frameDone. func (d *frameDec) startDecoder(output chan decodeOutput) { - // TODO: Init to dictionary - d.history.reset() written := int64(0) defer func() { @@ -439,8 +446,6 @@ func (d *frameDec) startDecoder(output chan decodeOutput) { // runDecoder will create a sync decoder that will decode a block of data. func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { - // TODO: Init to dictionary - d.history.reset() saved := d.history.b // We use the history for output to avoid copying it. diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index 9efe34feb..e6d3d49b3 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -19,7 +19,7 @@ const ( * Increasing memory usage improves compression ratio * Reduced memory usage can improve speed, due to cache effect * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ - maxMemoryUsage = 11 + maxMemoryUsage = tablelogAbsoluteMax + 2 maxTableLog = maxMemoryUsage - 2 maxTablesize = 1 << maxTableLog @@ -55,7 +55,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { if b.remain() < 4 { return errors.New("input too small") } - bitStream := b.Uint32() + bitStream := b.Uint32NC() nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog if nbBits > tablelogAbsoluteMax { println("Invalid tablelog:", nbBits) @@ -79,7 +79,8 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { n0 += 24 if r := b.remain(); r > 5 { b.advance(2) - bitStream = b.Uint32() >> bitCount + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> bitCount } else { // end of bit stream bitStream >>= 16 @@ -104,10 +105,11 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { charnum++ } - if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { b.advance(bitCount >> 3) bitCount &= 7 - bitStream = b.Uint32() >> bitCount + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> bitCount } else { bitStream >>= 2 } @@ -118,7 +120,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { if int32(bitStream)&(threshold-1) < max { count = int32(bitStream) & (threshold - 1) - if debug && nbBits < 1 { + if debugAsserts && nbBits < 1 { panic("nbBits underflow") } bitCount += nbBits - 1 @@ -148,17 +150,16 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { threshold >>= 1 } - //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain()) - if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { b.advance(bitCount >> 3) bitCount &= 7 + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> (bitCount & 31) } else { bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) b.off = len(b.b) - 4 - //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend) + bitStream = b.Uint32() >> (bitCount & 31) } - bitStream = b.Uint32() >> (bitCount & 31) - //printf("bitstream is now: 0b%b", bitStream) } s.symbolLen = charnum if s.symbolLen <= 1 { diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index 619836f52..aa9eba88b 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -327,7 +327,7 @@ func (s *fseEncoder) normalizeCount(length int) error { if err != nil { return err } - if debug { + if debugAsserts { err = s.validateNorm() if err != nil { return err @@ -336,7 +336,7 @@ func (s *fseEncoder) normalizeCount(length int) error { return s.buildCTable() } s.norm[largest] += stillToDistribute - if debug { + if debugAsserts { err := s.validateNorm() if err != nil { return err @@ -619,7 +619,7 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) { func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 { minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16 threshold := (minNbBits + 1) << 16 - if debug { + if debugAsserts { if !(s.actualTableLog < 16) { panic("!s.actualTableLog < 16") } @@ -633,7 +633,7 @@ func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 { // linear interpolation (very approximate) normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog bitMultiplier := uint32(1) << accuracyLog - if debug { + if debugAsserts { if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold { panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold") } diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/history.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/history.go index e8c419bd5..f418f50fc 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/history.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/history.go @@ -17,6 +17,7 @@ type history struct { windowSize int maxSize int error bool + dict *dict } // reset will reset the history to initial state of a frame. @@ -36,12 +37,27 @@ func (h *history) reset() { } h.decoders = sequenceDecs{} if h.huffTree != nil { - huffDecoderPool.Put(h.huffTree) + if h.dict == nil || h.dict.litDec != h.huffTree { + huffDecoderPool.Put(h.huffTree) + } } h.huffTree = nil + h.dict = nil //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) } +func (h *history) setDict(dict *dict) { + if dict == nil { + return + } + h.dict = dict + h.decoders.litLengths = dict.llDec + h.decoders.offsets = dict.ofDec + h.decoders.matchLengths = dict.mlDec + h.recentOffsets = dict.offsets + h.huffTree = dict.litDec +} + // append bytes to history. // This function will make sure there is space for it, // if the buffer has been allocated with enough extra space. diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/redshiftsink/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s index d580e32ae..2c9c5357a 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -179,13 +179,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40 MOVQ ·prime2v(SB), R14 // Load slice. - MOVQ b_base+8(FP), CX - MOVQ b_len+16(FP), DX + MOVQ arg1_base+8(FP), CX + MOVQ arg1_len+16(FP), DX LEAQ (CX)(DX*1), BX SUBQ $32, BX // Load vN from d. - MOVQ d+0(FP), AX + MOVQ arg+0(FP), AX MOVQ 0(AX), R8 // v1 MOVQ 8(AX), R9 // v2 MOVQ 16(AX), R10 // v3 @@ -209,7 +209,7 @@ blockLoop: MOVQ R11, 24(AX) // The number of bytes written is CX minus the old base pointer. - SUBQ b_base+8(FP), CX + SUBQ arg1_base+8(FP), CX MOVQ CX, ret+32(FP) RET diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/seqdec.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/seqdec.go index 15a45f7b5..7ff870400 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -62,8 +62,10 @@ type sequenceDecs struct { matchLengths sequenceDec prevOffset [3]int hist []byte + dict []byte literals []byte out []byte + windowSize int maxBits uint8 } @@ -82,7 +84,12 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] s.hist = hist.b s.prevOffset = hist.recentOffsets s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits + s.windowSize = hist.windowSize s.out = out + s.dict = nil + if hist.dict != nil { + s.dict = hist.dict.content + } return nil } @@ -98,23 +105,78 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { printf("reading sequence %d, exceeded available data\n", seqs-i) return io.ErrUnexpectedEOF } - var litLen, matchOff, matchLen int + var ll, mo, ml int if br.off > 4+((maxOffsetBits+16+16)>>3) { - litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState) + // inlined function: + // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) + + // Final will not read from stream. + var llB, mlB, moB uint8 + ll, llB = llState.final() + ml, mlB = mlState.final() + mo, moB = ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + } else { + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + } else { + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + } + } br.fillFast() } else { - litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState) + ll, mo, ml = s.next(br, llState, mlState, ofState) br.fill() } if debugSequences { - println("Seq", seqs-i-1, "Litlen:", litLen, "matchOff:", matchOff, "(abs) matchLen:", matchLen) + println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) } - if litLen > len(s.literals) { - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", litLen, len(s.literals)) + if ll > len(s.literals) { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals)) } - size := litLen + matchLen + len(s.out) + size := ll + ml + len(s.out) if size-startSize > maxBlockSize { return fmt.Errorf("output (%d) bigger than max block size", size) } @@ -125,49 +187,70 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { s.out = append(s.out, make([]byte, maxBlockSize)...) s.out = s.out[:len(s.out)-maxBlockSize] } - if matchLen > maxMatchLen { - return fmt.Errorf("match len (%d) bigger than max allowed length", matchLen) - } - if matchOff > len(s.out)+len(hist)+litLen { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", matchOff, len(s.out)+len(hist)+litLen) - } - if matchOff == 0 && matchLen > 0 { - return fmt.Errorf("zero matchoff and matchlen > 0") + if ml > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) } - s.out = append(s.out, s.literals[:litLen]...) - s.literals = s.literals[litLen:] + // Add literals + s.out = append(s.out, s.literals[:ll]...) + s.literals = s.literals[ll:] out := s.out + if mo > len(s.out)+len(hist) || mo > s.windowSize { + if len(s.dict) == 0 { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + } + + // we may be in dictionary. + dictO := len(s.dict) - (mo - (len(s.out) + len(hist))) + if dictO < 0 || dictO >= len(s.dict) { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + } + end := dictO + ml + if end > len(s.dict) { + out = append(out, s.dict[dictO:]...) + mo -= len(s.dict) - dictO + ml -= len(s.dict) - dictO + } else { + out = append(out, s.dict[dictO:end]...) + mo = 0 + ml = 0 + } + } + + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + // Copy from history. // TODO: Blocks without history could be made to ignore this completely. - if v := matchOff - len(s.out); v > 0 { + if v := mo - len(s.out); v > 0 { // v is the start position in history from end. start := len(s.hist) - v - if matchLen > v { + if ml > v { // Some goes into current block. // Copy remainder of history out = append(out, s.hist[start:]...) - matchOff -= v - matchLen -= v + mo -= v + ml -= v } else { - out = append(out, s.hist[start:start+matchLen]...) - matchLen = 0 + out = append(out, s.hist[start:start+ml]...) + ml = 0 } } // We must be in current buffer now - if matchLen > 0 { - start := len(s.out) - matchOff - if matchLen <= len(s.out)-start { + if ml > 0 { + start := len(s.out) - mo + if ml <= len(s.out)-start { // No overlap - out = append(out, s.out[start:start+matchLen]...) + out = append(out, s.out[start:start+ml]...) } else { // Overlapping copy // Extend destination slice and copy one byte at the time. - out = out[:len(out)+matchLen] - src := out[start : start+matchLen] + out = out[:len(out)+ml] + src := out[start : start+ml] // Destination is the space we just added. - dst := out[len(out)-matchLen:] + dst := out[len(out)-ml:] dst = dst[:len(src)] for i := range src { dst[i] = src[i] diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/snappy.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/snappy.go index 356956ba2..690428cd2 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -178,7 +178,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.err = ErrSnappyCorrupt return written, r.err } - err = r.block.encode(false) + err = r.block.encode(false, false) switch err { case errIncompressible: r.block.popOffsets() diff --git a/redshiftsink/vendor/github.com/klauspost/compress/zstd/zstd.go b/redshiftsink/vendor/github.com/klauspost/compress/zstd/zstd.go index 57a8a2f5b..0807719c8 100644 --- a/redshiftsink/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/redshiftsink/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -6,11 +6,20 @@ package zstd import ( "errors" "log" + "math" "math/bits" ) +// enable debug printing const debug = false + +// Enable extra assertions. +const debugAsserts = debug || false + +// print sequence details const debugSequences = false + +// print detailed matching information const debugMatches = false // force encoder to use predefined tables. @@ -19,6 +28,9 @@ const forcePreDef = false // zstdMinMatch is the minimum zstd match length. const zstdMinMatch = 3 +// Reset the buffer offset when reaching this. +const bufferReset = math.MaxInt32 - MaxWindowSize + var ( // ErrReservedBlockType is returned when a reserved block type is found. // Typically this indicates wrong or corrupted input. @@ -75,6 +87,17 @@ func printf(format string, a ...interface{}) { } } +// matchLenFast does matching, but will not match the last up to 7 bytes. +func matchLenFast(a, b []byte) int { + endI := len(a) & (math.MaxInt32 - 7) + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + return i + bits.TrailingZeros64(diff)>>3 + } + } + return endI +} + // matchLen returns the maximum length. // a must be the shortest of the two. // The function also returns whether all bytes matched. @@ -85,33 +108,18 @@ func matchLen(a, b []byte) int { return i + (bits.TrailingZeros64(diff) >> 3) } } + checked := (len(a) >> 3) << 3 a = a[checked:] b = b[checked:] - // TODO: We could do a 4 check. for i := range a { if a[i] != b[i] { - return int(i) + checked + return i + checked } } return len(a) + checked } -// matchLen returns a match length in src between index s and t -func matchLenIn(src []byte, s, t int32) int32 { - s1 := len(src) - b := src[t:] - a := src[s:s1] - b = b[:len(a)] - // Extend the match to be as long as possible. - for i := range a { - if a[i] != b[i] { - return int32(i) - } - } - return int32(len(a)) -} - func load3232(b []byte, i int32) uint32 { // Help the compiler eliminate bounds checks on the read so it can be done in a single read. b = b[i:] diff --git a/redshiftsink/vendor/github.com/pierrec/lz4/block.go b/redshiftsink/vendor/github.com/pierrec/lz4/block.go index b589af467..664d9be58 100644 --- a/redshiftsink/vendor/github.com/pierrec/lz4/block.go +++ b/redshiftsink/vendor/github.com/pierrec/lz4/block.go @@ -2,8 +2,8 @@ package lz4 import ( "encoding/binary" - "fmt" "math/bits" + "sync" ) // blockHash hashes the lower 6 bytes into a value < htSize. @@ -35,24 +35,31 @@ func UncompressBlock(src, dst []byte) (int, error) { // CompressBlock compresses the source buffer into the destination one. // This is the fast version of LZ4 compression and also the default one. -// The size of hashTable must be at least 64Kb. // -// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible. +// The argument hashTable is scratch space for a hash table used by the +// compressor. If provided, it should have length at least 1<<16. If it is +// shorter (or nil), CompressBlock allocates its own hash table. +// +// The size of the compressed data is returned. +// +// If the destination buffer size is lower than CompressBlockBound and +// the compressed size is 0 and no error, then the data is incompressible. // // An error is returned if the destination buffer is too small. func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { - if len(hashTable) < htSize { - return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize) - } defer recoverBlock(&err) + // Return 0, nil only if the destination buffer size is < CompressBlockBound. + isNotCompressible := len(dst) < CompressBlockBound(len(src)) + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. - // This significantly speeds up incompressible data and usually has very small impact on compresssion. + // This significantly speeds up incompressible data and usually has very small impact on compression. // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) const adaptSkipLog = 7 - sn, dn := len(src)-mfLimit, len(dst) - if sn <= 0 || dn == 0 { - return 0, nil + if len(hashTable) < htSize { + htIface := htPool.Get() + defer htPool.Put(htIface) + hashTable = (*(htIface).(*[htSize]int))[:] } // Prove to the compiler the table has at least htSize elements. // The compiler can see that "uint32() >> hashShift" cannot be out of bounds. @@ -61,6 +68,10 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { // si: Current position of the search. // anchor: Position of the current literals. var si, di, anchor int + sn := len(src) - mfLimit + if sn <= 0 { + goto lastLiterals + } // Fast scan strategy: the hash table only stores the last 4 bytes sequences. for si < sn { @@ -184,7 +195,8 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { hashTable[h] = si - 2 } - if anchor == 0 { +lastLiterals: + if isNotCompressible && anchor == 0 { // Incompressible. return 0, nil } @@ -205,7 +217,7 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { di++ // Write the last literals. - if di >= anchor { + if isNotCompressible && di >= anchor { // Incompressible. return 0, nil } @@ -213,6 +225,13 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) { return di, nil } +// Pool of hash tables for CompressBlock. +var htPool = sync.Pool{ + New: func() interface{} { + return new([htSize]int) + }, +} + // blockHash hashes 4 bytes into a value < winSize. func blockHashHC(x uint32) uint32 { const hasher uint32 = 2654435761 // Knuth multiplicative hash. @@ -224,22 +243,24 @@ func blockHashHC(x uint32) uint32 { // // CompressBlockHC compression ratio is better than CompressBlock but it is also slower. // -// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible. +// The size of the compressed data is returned. +// +// If the destination buffer size is lower than CompressBlockBound and +// the compressed size is 0 and no error, then the data is incompressible. // // An error is returned if the destination buffer is too small. func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { defer recoverBlock(&err) + // Return 0, nil only if the destination buffer size is < CompressBlockBound. + isNotCompressible := len(dst) < CompressBlockBound(len(src)) + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. - // This significantly speeds up incompressible data and usually has very small impact on compresssion. + // This significantly speeds up incompressible data and usually has very small impact on compression. // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) const adaptSkipLog = 7 - sn, dn := len(src)-mfLimit, len(dst) - if sn <= 0 || dn == 0 { - return 0, nil - } - var si, di int + var si, di, anchor int // hashTable: stores the last position found for a given hash // chainTable: stores previous positions for a given hash @@ -249,7 +270,11 @@ func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { depth = winSize } - anchor := si + sn := len(src) - mfLimit + if sn <= 0 { + goto lastLiterals + } + for si < sn { // Hash the next 4 bytes (sequence). match := binary.LittleEndian.Uint32(src[si:]) @@ -356,12 +381,13 @@ func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { } } - if anchor == 0 { + if isNotCompressible && anchor == 0 { // Incompressible. return 0, nil } // Last literals. +lastLiterals: lLen := len(src) - anchor if lLen < 0xF { dst[di] = byte(lLen << 4) @@ -378,7 +404,7 @@ func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) { di++ // Write the last literals. - if di >= anchor { + if isNotCompressible && di >= anchor { // Incompressible. return 0, nil } diff --git a/redshiftsink/vendor/github.com/pierrec/lz4/lz4.go b/redshiftsink/vendor/github.com/pierrec/lz4/lz4.go index 29864d8fd..6c73539a3 100644 --- a/redshiftsink/vendor/github.com/pierrec/lz4/lz4.go +++ b/redshiftsink/vendor/github.com/pierrec/lz4/lz4.go @@ -98,7 +98,7 @@ func blockSizeValueToIndex(size int) byte { // (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html). // // NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls. -// It is the caller responsibility to check them if necessary. +// It is the caller's responsibility to check them if necessary. type Header struct { BlockChecksum bool // Compressed blocks checksum flag. NoChecksum bool // Frame checksum flag. diff --git a/redshiftsink/vendor/github.com/rcrowley/go-metrics/.travis.yml b/redshiftsink/vendor/github.com/rcrowley/go-metrics/.travis.yml index aead07654..409a5b631 100644 --- a/redshiftsink/vendor/github.com/rcrowley/go-metrics/.travis.yml +++ b/redshiftsink/vendor/github.com/rcrowley/go-metrics/.travis.yml @@ -11,6 +11,8 @@ go: - "1.10" - "1.11" - "1.12" + - "1.13" + - "1.14" script: - ./validate.sh diff --git a/redshiftsink/vendor/modules.txt b/redshiftsink/vendor/modules.txt index bf7e95cc8..90a8f36c7 100644 --- a/redshiftsink/vendor/modules.txt +++ b/redshiftsink/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/Shopify/sarama v1.26.4 +# github.com/Shopify/sarama v1.26.5-0.20200805062841-c4bb5cd3a290 ## explicit github.com/Shopify/sarama # github.com/davecgh/go-spew v1.1.1 @@ -18,27 +18,27 @@ github.com/hashicorp/go-uuid # github.com/jcmturner/gofork v1.0.0 github.com/jcmturner/gofork/encoding/asn1 github.com/jcmturner/gofork/x/crypto/pbkdf2 -# github.com/klauspost/compress v1.9.8 +# github.com/klauspost/compress v1.10.10 github.com/klauspost/compress/fse github.com/klauspost/compress/huff0 github.com/klauspost/compress/snappy github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash -# github.com/pierrec/lz4 v2.4.1+incompatible +# github.com/pierrec/lz4 v2.5.2+incompatible github.com/pierrec/lz4 github.com/pierrec/lz4/internal/xxh32 # github.com/practo/klog/v2 v2.2.1 ## explicit github.com/practo/klog/v2 -# github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 +# github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 github.com/rcrowley/go-metrics # github.com/spf13/pflag v1.0.5 ## explicit github.com/spf13/pflag -# golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72 +# golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37 golang.org/x/crypto/md4 golang.org/x/crypto/pbkdf2 -# golang.org/x/net v0.0.0-20200202094626-16171245cfb2 +# golang.org/x/net v0.0.0-20200528225125-3c3fba18258b golang.org/x/net/internal/socks golang.org/x/net/proxy # gopkg.in/jcmturner/aescts.v1 v1.0.1