Skip to content

Commit

Permalink
zstd: Improve decompression speed (#511)
Browse files Browse the repository at this point in the history
Copy appears better than append. In async streaming where we know the size, use copy instead of append.

Simplify some checks.

```
STREAM BEFORE: (best of 5)
BenchmarkDecoderEnwik9-32              4     275810800 ns/op     362.57 MB/s       53570 B/op         44 allocs/op
BenchmarkDecoderSilesia-32             3     389839700 ns/op     543.68 MB/s     2148208 B/op         44 allocs/op
BenchmarkDecoderJSON-32             1    6487052600 ns/op     967.15 MB/s      101128 B/op         68 allocs/op

STREAM AFTER: (best of 5)

BenchmarkDecoderEnwik9-32              4     252812900 ns/op     395.55 MB/s       53970 B/op         45 allocs/op
BenchmarkDecoderSilesia-32             3     360591267 ns/op     587.78 MB/s     2148309 B/op         45 allocs/op
BenchmarkDecoderJSON-32            1    5632760500 ns/op    1113.83 MB/s       63008 B/op         55 allocs/op

benchmark                                                                 old ns/op     new ns/op     delta
BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32                            4480251       4484930       +0.10%
BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32                        1024974       1021181       -0.37%
BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32                         14753194      14762007      +0.06%
BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32                           10879473      10653569      -2.08%
BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32                         3570259       3556231       -0.39%
BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32                          4703995       4586619       -2.50%
BenchmarkDecoder_DecoderSmall/html_x_4.zst-32                             1555951       1430158       -8.08%
BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32                       201293        198664        -1.31%
BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32                       77448         77995         +0.71%
BenchmarkDecoder_DecoderSmall/urls.10K.zst-32                             12220712      12006213      -1.76%
BenchmarkDecoder_DecoderSmall/html.zst-32                                 1105131       1099679       -0.49%
BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32                        83636         82497         -1.36%
BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32                               545855        536756        -1.67%
BenchmarkDecoder_DecodeAll/geo.protodata.zst-32                           126847        124858        -1.57%
BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32                            1737341       1705736       -1.82%
BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32                              1287042       1268665       -1.43%
BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32                            444045        437962        -1.37%
BenchmarkDecoder_DecodeAll/alice29.txt.zst-32                             579794        564964        -2.56%
BenchmarkDecoder_DecodeAll/html_x_4.zst-32                                242215        239961        -0.93%
BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32                          23355         22708         -2.77%
BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32                          9093          9109          +0.18%
BenchmarkDecoder_DecodeAll/urls.10K.zst-32                                1445517       1426542       -1.31%
BenchmarkDecoder_DecodeAll/html.zst-32                                    136453        135449        -0.74%
BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32                           10382         10194         -1.81%
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/fastest-32          1503663       1420972       -5.50%
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/default-32          1394726       1336862       -4.15%
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/better-32           1238850       1206617       -2.60%
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/best-32             922146        894515        -3.00%
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/fastest-32                   4511          4523          +0.27%
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/default-32                   185382        183977        -0.76%
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/better-32                    173345        174367        +0.59%
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/best-32                      200077        200729        +0.33%
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/fastest-32      1340305       1320598       -1.47%
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/default-32      1356206       1348648       -0.56%
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/better-32       1279765       1262501       -1.35%
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/best-32         1326312       1290128       -2.73%
BenchmarkDecoder_DecodeAllFiles/e.txt/fastest-32                          7429          7409          -0.27%
BenchmarkDecoder_DecodeAllFiles/e.txt/default-32                          337217        332148        -1.50%
BenchmarkDecoder_DecodeAllFiles/e.txt/better-32                           264014        259935        -1.54%
BenchmarkDecoder_DecodeAllFiles/e.txt/best-32                             190466        190781        +0.17%
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/fastest-32              3195          3159          -1.13%
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/default-32              3096          2918          -5.75%
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/better-32               3564          3528          -1.01%
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/best-32                 9356          9213          -1.53%
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/fastest-32                 5209          5164          -0.86%
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/default-32                 6511          6430          -1.24%
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/better-32                  6484          6382          -1.57%
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/best-32                    6963          6917          -0.66%
BenchmarkDecoder_DecodeAllFiles/html.txt/fastest-32                       104999        104529        -0.45%
BenchmarkDecoder_DecodeAllFiles/html.txt/default-32                       108301        109125        +0.76%
BenchmarkDecoder_DecodeAllFiles/html.txt/better-32                        101897        103069        +1.15%
BenchmarkDecoder_DecodeAllFiles/html.txt/best-32                          111684        108225        -3.10%
BenchmarkDecoder_DecodeAllFiles/pi.txt/fastest-32                         7440          7454          +0.19%
BenchmarkDecoder_DecodeAllFiles/pi.txt/default-32                         339353        330973        -2.47%
BenchmarkDecoder_DecodeAllFiles/pi.txt/better-32                          262241        259706        -0.97%
BenchmarkDecoder_DecodeAllFiles/pi.txt/best-32                            192520        190490        -1.05%
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/fastest-32                    27357         26713         -2.35%
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/default-32                    30316         30122         -0.64%
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/better-32                     25379         24816         -2.22%
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/best-32                       30487         29783         -2.31%
BenchmarkDecoder_DecodeAllFiles/sharnd.out/fastest-32                     7491          7386          -1.40%
BenchmarkDecoder_DecodeAllFiles/sharnd.out/default-32                     7471          7469          -0.03%
BenchmarkDecoder_DecodeAllFiles/sharnd.out/better-32                      7480          7399          -1.08%
BenchmarkDecoder_DecodeAllFiles/sharnd.out/best-32                        7442          7445          +0.04%
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/fastest-32         93914         96888         +3.17%
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/default-32         70662         66887         -5.34%
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/better-32          64330         60733         -5.59%
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/best-32            46892         45656         -2.64%
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/fastest-32                  1706          1702          -0.23%
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/default-32                  10369         10491         +1.18%
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/better-32                   9685          9589          -0.99%
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/best-32                     11170         10915         -2.28%
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/fastest-32     91486         82267         -10.08%
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/default-32     74032         70317         -5.02%
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/better-32      68588         66062         -3.68%
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/best-32        71520         68268         -4.55%
BenchmarkDecoder_DecodeAllFilesP/e.txt/fastest-32                         1935          1918          -0.88%
BenchmarkDecoder_DecodeAllFilesP/e.txt/default-32                         20006         19697         -1.54%
BenchmarkDecoder_DecodeAllFilesP/e.txt/better-32                          14810         14510         -2.03%
BenchmarkDecoder_DecodeAllFilesP/e.txt/best-32                            10840         10862         +0.20%
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/fastest-32             1353          1315          -2.81%
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/default-32             1356          1339          -1.25%
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/better-32              1283          1280          -0.23%
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/best-32                1225          1216          -0.73%
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/fastest-32                1295          1336          +3.17%
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/default-32                1298          1283          -1.16%
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/better-32                 1236          1255          +1.54%
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/best-32                   1270          1207          -4.96%
BenchmarkDecoder_DecodeAllFilesP/html.txt/fastest-32                      5840          5875          +0.60%
BenchmarkDecoder_DecodeAllFilesP/html.txt/default-32                      6230          6067          -2.62%
BenchmarkDecoder_DecodeAllFilesP/html.txt/better-32                       5711          5619          -1.61%
BenchmarkDecoder_DecodeAllFilesP/html.txt/best-32                         6459          6405          -0.84%
BenchmarkDecoder_DecodeAllFilesP/pi.txt/fastest-32                        1922          1943          +1.09%
BenchmarkDecoder_DecodeAllFilesP/pi.txt/default-32                        20029         19041         -4.93%
BenchmarkDecoder_DecodeAllFilesP/pi.txt/better-32                         14729         14440         -1.96%
BenchmarkDecoder_DecodeAllFilesP/pi.txt/best-32                           10857         10861         +0.04%
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/fastest-32                   1566          1481          -5.43%
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/default-32                   1609          1616          +0.44%
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/better-32                    1351          1334          -1.26%
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/best-32                      1531          1525          -0.39%
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/fastest-32                    1932          1921          -0.57%
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/default-32                    1948          1947          -0.05%
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/better-32                     1924          1903          -1.09%
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/best-32                       1939          1892          -2.42%
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32                       34626         30036         -13.26%
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32                   7397          6822          -7.77%
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32                    113773        96432         -15.24%
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32                      84246         72464         -13.99%
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32                    28424         24778         -12.83%
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32                     38002         32737         -13.85%
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32                        13434         13014         -3.13%
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32                  1463          1349          -7.79%
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32                  2014          2022          +0.40%
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32                        86544         78091         -9.77%
BenchmarkDecoder_DecodeAllParallel/html.zst-32                            8358          7631          -8.70%
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32                   787           794           +0.81%
BenchmarkDecoderSilesia-32                                                426845533     382465000     -10.40%
BenchmarkDecoderEnwik9-32                                                 297446375     268355150     -9.78%

benchmark                                                                 old MB/s     new MB/s     speedup
BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32                            329.12       328.78       1.00x
BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32                        925.59       929.03       1.00x
BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32                         261.29       261.14       1.00x
BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32                           313.80       320.46       1.02x
BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32                         280.49       281.60       1.00x
BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32                          258.66       265.27       1.03x
BenchmarkDecoder_DecoderSmall/html_x_4.zst-32                             2105.98      2291.22      1.09x
BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32                       4069.70      4123.55      1.01x
BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32                       12714.88     12625.68     0.99x
BenchmarkDecoder_DecoderSmall/urls.10K.zst-32                             459.60       467.82       1.02x
BenchmarkDecoder_DecoderSmall/html.zst-32                                 741.27       744.94       1.00x
BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32                        389.88       395.26       1.01x
BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32                               337.67       343.40       1.02x
BenchmarkDecoder_DecodeAll/geo.protodata.zst-32                           934.89       949.78       1.02x
BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32                            277.36       282.49       1.02x
BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32                              331.58       336.38       1.01x
BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32                            281.91       285.82       1.01x
BenchmarkDecoder_DecodeAll/alice29.txt.zst-32                             262.32       269.20       1.03x
BenchmarkDecoder_DecodeAll/html_x_4.zst-32                                1691.06      1706.95      1.01x
BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32                          4384.52      4509.41      1.03x
BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32                          13536.42     13513.20     1.00x
BenchmarkDecoder_DecodeAll/urls.10K.zst-32                                485.70       492.16       1.01x
BenchmarkDecoder_DecodeAll/html.zst-32                                    750.44       756.00       1.01x
BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32                           392.61       399.86       1.02x
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/fastest-32          319.22       337.80       1.06x
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/default-32          344.15       359.05       1.04x
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/better-32           387.46       397.81       1.03x
BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/best-32             520.53       536.60       1.03x
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/fastest-32                   13301.00     13264.44     1.00x
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/default-32                   323.66       326.13       1.01x
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/better-32                    346.13       344.10       0.99x
BenchmarkDecoder_DecodeAllFiles/.tracker.bin/best-32                      299.88       298.91       1.00x
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/fastest-32      289.46       293.78       1.01x
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/default-32      286.07       287.67       1.01x
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/better-32       303.15       307.30       1.01x
BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/best-32         292.51       300.72       1.03x
BenchmarkDecoder_DecodeAllFiles/e.txt/fastest-32                          13462.02     13496.77     1.00x
BenchmarkDecoder_DecodeAllFiles/e.txt/default-32                          296.55       301.08       1.02x
BenchmarkDecoder_DecodeAllFiles/e.txt/better-32                           378.78       384.72       1.02x
BenchmarkDecoder_DecodeAllFiles/e.txt/best-32                             525.05       524.18       1.00x
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/fastest-32              1288.21      1302.89      1.01x
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/default-32              1329.47      1410.42      1.06x
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/better-32               1154.95      1166.67      1.01x
BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/best-32                 439.95       446.75       1.02x
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/fastest-32                 297.17       299.77       1.01x
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/default-32                 237.77       240.75       1.01x
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/better-32                  238.73       242.55       1.02x
BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/best-32                    222.31       223.81       1.01x
BenchmarkDecoder_DecodeAllFiles/html.txt/fastest-32                       423.59       425.50       1.00x
BenchmarkDecoder_DecodeAllFiles/html.txt/default-32                       410.68       407.58       0.99x
BenchmarkDecoder_DecodeAllFiles/html.txt/better-32                        436.49       431.53       0.99x
BenchmarkDecoder_DecodeAllFiles/html.txt/best-32                          398.24       410.97       1.03x
BenchmarkDecoder_DecodeAllFiles/pi.txt/fastest-32                         13440.83     13415.45     1.00x
BenchmarkDecoder_DecodeAllFiles/pi.txt/default-32                         294.69       302.15       1.03x
BenchmarkDecoder_DecodeAllFiles/pi.txt/better-32                          381.34       385.06       1.01x
BenchmarkDecoder_DecodeAllFiles/pi.txt/best-32                            519.44       524.98       1.01x
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/fastest-32                    1871.53      1916.67      1.02x
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/default-32                    1688.89      1699.76      1.01x
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/better-32                     2017.44      2063.17      1.02x
BenchmarkDecoder_DecodeAllFiles/pngdata.bin/best-32                       1679.40      1719.09      1.02x
BenchmarkDecoder_DecodeAllFiles/sharnd.out/fastest-32                     13350.37     13538.80     1.01x
BenchmarkDecoder_DecodeAllFiles/sharnd.out/default-32                     13385.38     13389.11     1.00x
BenchmarkDecoder_DecodeAllFiles/sharnd.out/better-32                      13369.65     13516.52     1.01x
BenchmarkDecoder_DecodeAllFiles/sharnd.out/best-32                        13437.24     13431.84     1.00x
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/fastest-32         5111.06      4954.15      0.97x
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/default-32         6792.94      7176.26      1.06x
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/better-32          7461.50      7903.48      1.06x
BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/best-32            10236.18     10513.47     1.03x
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/fastest-32                  35163.03     35254.05     1.00x
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/default-32                  5786.32      5719.16      0.99x
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/better-32                   6194.89      6257.03      1.01x
BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/best-32                     5371.72      5497.26      1.02x
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/fastest-32     4240.71      4715.91      1.11x
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/default-32     5240.50      5517.37      1.05x
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/better-32      5656.41      5872.77      1.04x
BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/best-32        5424.53      5682.96      1.05x
BenchmarkDecoder_DecodeAllFilesP/e.txt/fastest-32                         51685.02     52145.98     1.01x
BenchmarkDecoder_DecodeAllFilesP/e.txt/default-32                         4998.65      5077.00      1.02x
BenchmarkDecoder_DecodeAllFilesP/e.txt/better-32                          6752.55      6892.07      1.02x
BenchmarkDecoder_DecodeAllFilesP/e.txt/best-32                            9225.05      9207.07      1.00x
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/fastest-32             3043.07      3129.02      1.03x
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/default-32             3035.30      3074.23      1.01x
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/better-32              3209.01      3214.71      1.00x
BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/best-32                3360.11      3383.82      1.01x
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/fastest-32                1195.22      1158.31      0.97x
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/default-32                1192.30      1206.40      1.01x
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/better-32                 1252.38      1233.71      0.99x
BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/best-32                   1219.26      1282.15      1.05x
BenchmarkDecoder_DecodeAllFilesP/html.txt/fastest-32                      7615.58      7570.06      0.99x
BenchmarkDecoder_DecodeAllFilesP/html.txt/default-32                      7139.51      7330.77      1.03x
BenchmarkDecoder_DecodeAllFilesP/html.txt/better-32                       7787.61      7914.88      1.02x
BenchmarkDecoder_DecodeAllFilesP/html.txt/best-32                         6886.49      6944.28      1.01x
BenchmarkDecoder_DecodeAllFilesP/pi.txt/fastest-32                        52022.79     51480.17     0.99x
BenchmarkDecoder_DecodeAllFilesP/pi.txt/default-32                        4992.98      5251.99      1.05x
BenchmarkDecoder_DecodeAllFilesP/pi.txt/better-32                         6789.42      6925.45      1.02x
BenchmarkDecoder_DecodeAllFilesP/pi.txt/best-32                           9210.74      9207.14      1.00x
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/fastest-32                   32689.14     34582.57     1.06x
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/default-32                   31830.69     31687.49     1.00x
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/better-32                    37892.96     38369.48     1.01x
BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/best-32                      33442.97     33568.58     1.00x
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/fastest-32                    51761.39     52071.32     1.01x
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/default-32                    51331.54     51353.05     1.00x
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/better-32                     51966.35     52553.14     1.01x
BenchmarkDecoder_DecodeAllFilesP/sharnd.out/best-32                       51568.12     52848.90     1.02x
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32                       5323.13      6136.61      1.15x
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32                   16031.69     17383.02     1.08x
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32                    4235.29      4996.90      1.18x
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32                      5065.58      5889.20      1.16x
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32                    4403.99      5051.92      1.15x
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32                     4002.09      4645.74      1.16x
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32                        30489.31     31474.10     1.03x
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32                  69969.58     75900.46     1.08x
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32                  61103.57     60874.06     1.00x
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32                        8112.48      8990.66      1.11x
BenchmarkDecoder_DecodeAllParallel/html.zst-32                            12252.21     13419.37     1.10x
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32                   5177.63      5135.95      0.99x
BenchmarkDecoderSilesia-32                                                496.54       554.16       1.12x
BenchmarkDecoderEnwik9-32                                                 336.20       372.64       1.11x
```
  • Loading branch information
klauspost committed Mar 2, 2022
1 parent ebaaaa2 commit 3cfb088
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 45 deletions.
1 change: 1 addition & 0 deletions zstd/blockdec.go
Expand Up @@ -632,6 +632,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
}
b.sequence = b.sequence[:hist.decoders.nSeqs]
if hist.decoders.nSeqs == 0 {
hist.decoders.seqSize = len(hist.decoders.literals)
return nil
}
hist.decoders.prevOffset = hist.recentOffsets
Expand Down
2 changes: 1 addition & 1 deletion zstd/decoder_test.go
Expand Up @@ -1052,7 +1052,7 @@ func testDecoderFile(t *testing.T, fn string, newDec func() (*Decoder, error)) {
}
wg.Wait()
if gotError != nil {
t.Error(err)
t.Error(gotError, err)
if err != ErrCRCMismatch {
return
}
Expand Down
100 changes: 56 additions & 44 deletions zstd/seqdec.go
Expand Up @@ -170,7 +170,9 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
br.fillFast()
} else {
if br.overread() {
printf("reading sequence %d, exceeded available data\n", i)
if debugDecoder {
printf("reading sequence %d, exceeded available data\n", i)
}
return io.ErrUnexpectedEOF
}
ll, mo, ml = s.next(br, llState, mlState, ofState)
Expand Down Expand Up @@ -248,76 +250,89 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}

if debugDecoder {
printf("Execute %d seqs with hist %d, dict %d, literals: %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals))
printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize)
}

var t = len(s.out)
out := s.out[:t+s.seqSize]

for _, seq := range seqs {
// Add literals
s.out = append(s.out, s.literals[:seq.ll]...)
copy(out[t:], s.literals[:seq.ll])
t += seq.ll
s.literals = s.literals[seq.ll:]
out := s.out

// Copy form dictionary...
if seq.mo > len(s.out)+len(hist) || seq.mo > s.windowSize {
// Copy from dictionary...
if seq.mo > t+len(hist) || seq.mo > s.windowSize {
if len(s.dict) == 0 {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, len(s.out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
}

// we may be in dictionary.
dictO := len(s.dict) - (seq.mo - (len(s.out) + len(hist)))
dictO := len(s.dict) - (seq.mo - (t + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, len(s.out)+len(hist)+len(s.dict))
return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict))
}
end := dictO + seq.ml
if end > len(s.dict) {
out = append(out, s.dict[dictO:]...)
seq.mo -= len(s.dict) - dictO
seq.ml -= len(s.dict) - dictO
n := len(s.dict) - dictO
copy(out[t:], s.dict[dictO:])
t += n
seq.ml -= n
} else {
s.out = append(out, s.dict[dictO:end]...)
copy(out[t:], s.dict[dictO:end])
t += end - dictO
continue
}
}

// Copy from history.
if v := seq.mo - len(s.out); v > 0 {
if v := seq.mo - t; v > 0 {
// v is the start position in history from end.
start := len(hist) - v
if seq.ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, hist[start:]...)
seq.mo -= v
copy(out[t:], hist[start:])
t += v
seq.ml -= v
} else {
s.out = append(out, hist[start:start+seq.ml]...)
copy(out[t:], hist[start:start+seq.ml])
t += seq.ml
continue
}
}
// We must be in current buffer now
if seq.ml > 0 {
start := len(s.out) - seq.mo
if seq.ml <= len(s.out)-start {
start := t - seq.mo
if seq.ml <= t-start {
// No overlap
s.out = append(out, s.out[start:start+seq.ml]...)
copy(out[t:], out[start:start+seq.ml])
t += seq.ml
continue
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
out = out[:len(out)+seq.ml]
src := out[start : start+seq.ml]
// Destination is the space we just added.
dst := out[len(out)-seq.ml:]
dst := out[t:]
dst = dst[:len(src)]
t += len(src)
// Destination is the space we just added.
for i := range src {
dst[i] = src[i]
}
}
}
s.out = out
}
// Add final literals
s.out = append(s.out, s.literals...)
copy(out[t:], s.literals)
if debugDecoder {
t += len(s.literals)
if t != len(out) {
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
}
}
s.out = out

return nil
}
Expand All @@ -331,6 +346,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
hist := history.b[history.ignoreBuffer:]
out := s.out

for i := seqs - 1; i >= 0; i-- {
if br.overread() {
Expand Down Expand Up @@ -408,51 +424,49 @@ func (s *sequenceDecs) decodeSync(history *history) error {
if ll > len(s.literals) {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
}
size := ll + ml + len(s.out)
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size", size)
}
if size > cap(s.out) {
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
// but could be if destination slice is too small for sync operations.
// over-allocating here can create a large amount of GC pressure so we try to keep
// it as contained as possible
used := len(s.out) - startSize
used := len(out) - startSize
addBytes := 256 + ll + ml + used>>2
// Clamp to max block size.
if used+addBytes > maxBlockSize {
addBytes = maxBlockSize - used
}
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
out = append(out, make([]byte, addBytes)...)
out = out[:len(out)-addBytes]
}
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}

// Add literals
s.out = append(s.out, s.literals[:ll]...)
out = append(out, s.literals[:ll]...)
s.literals = s.literals[ll:]
out := s.out

if mo == 0 && ml > 0 {
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}

if mo > len(s.out)+len(hist) || mo > s.windowSize {
if mo > len(out)+len(hist) || mo > s.windowSize {
if len(s.dict) == 0 {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
}

// we may be in dictionary.
dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
}
end := dictO + ml
if end > len(s.dict) {
out = append(out, s.dict[dictO:]...)
mo -= len(s.dict) - dictO
ml -= len(s.dict) - dictO
} else {
out = append(out, s.dict[dictO:end]...)
Expand All @@ -463,14 +477,13 @@ func (s *sequenceDecs) decodeSync(history *history) error {

// Copy from history.
// TODO: Blocks without history could be made to ignore this completely.
if v := mo - len(s.out); v > 0 {
if v := mo - len(out); v > 0 {
// v is the start position in history from end.
start := len(hist) - v
if ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, hist[start:]...)
mo -= v
ml -= v
} else {
out = append(out, hist[start:start+ml]...)
Expand All @@ -479,10 +492,10 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}
// We must be in current buffer now
if ml > 0 {
start := len(s.out) - mo
if ml <= len(s.out)-start {
start := len(out) - mo
if ml <= len(out)-start {
// No overlap
out = append(out, s.out[start:start+ml]...)
out = append(out, out[start:start+ml]...)
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
Expand All @@ -496,7 +509,6 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}
}
}
s.out = out
if i == 0 {
// This is the last sequence, so we shouldn't update state.
break
Expand Down Expand Up @@ -524,7 +536,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}

// Add final literals
s.out = append(s.out, s.literals...)
s.out = append(out, s.literals...)
return br.close()
}

Expand Down

0 comments on commit 3cfb088

Please sign in to comment.