From cd376a23e6e2f989b45064354bb4316d47e9669c Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 1 Mar 2022 16:42:55 +0100 Subject: [PATCH 1/3] zstd: Improve decompression speed Copy appears better than append. In async streaming where we know the size, use copy instead of append. Simplify some checks. ``` STREAM BEFORE: (best of 5) BenchmarkDecoderEnwik9-32 4 275810800 ns/op 362.57 MB/s 53570 B/op 44 allocs/op BenchmarkDecoderSilesia-32 3 389839700 ns/op 543.68 MB/s 2148208 B/op 44 allocs/op BenchmarkDecoderJSON-32 1 6487052600 ns/op 967.15 MB/s 101128 B/op 68 allocs/op STREAM AFTER: (best of 5) BenchmarkDecoderEnwik9-32 4 252812900 ns/op 395.55 MB/s 53970 B/op 45 allocs/op BenchmarkDecoderSilesia-32 3 360591267 ns/op 587.78 MB/s 2148309 B/op 45 allocs/op BenchmarkDecoderJSON-32 1 5632760500 ns/op 1113.83 MB/s 63008 B/op 55 allocs/op benchmark old ns/op new ns/op delta BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32 4480251 4484930 +0.10% BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32 1024974 1021181 -0.37% BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32 14753194 14762007 +0.06% BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32 10879473 10653569 -2.08% BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32 3570259 3556231 -0.39% BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32 4703995 4586619 -2.50% BenchmarkDecoder_DecoderSmall/html_x_4.zst-32 1555951 1430158 -8.08% BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32 201293 198664 -1.31% BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32 77448 77995 +0.71% BenchmarkDecoder_DecoderSmall/urls.10K.zst-32 12220712 12006213 -1.76% BenchmarkDecoder_DecoderSmall/html.zst-32 1105131 1099679 -0.49% BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32 83636 82497 -1.36% BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32 545855 536756 -1.67% BenchmarkDecoder_DecodeAll/geo.protodata.zst-32 126847 124858 -1.57% BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32 1737341 1705736 -1.82% BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32 1287042 1268665 -1.43% BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32 444045 437962 -1.37% BenchmarkDecoder_DecodeAll/alice29.txt.zst-32 579794 564964 -2.56% BenchmarkDecoder_DecodeAll/html_x_4.zst-32 242215 239961 -0.93% BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32 23355 22708 -2.77% BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32 9093 9109 +0.18% BenchmarkDecoder_DecodeAll/urls.10K.zst-32 1445517 1426542 -1.31% BenchmarkDecoder_DecodeAll/html.zst-32 136453 135449 -0.74% BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32 10382 10194 -1.81% BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/fastest-32 1503663 1420972 -5.50% BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/default-32 1394726 1336862 -4.15% BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/better-32 1238850 1206617 -2.60% BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/best-32 922146 894515 -3.00% BenchmarkDecoder_DecodeAllFiles/.tracker.bin/fastest-32 4511 4523 +0.27% BenchmarkDecoder_DecodeAllFiles/.tracker.bin/default-32 185382 183977 -0.76% BenchmarkDecoder_DecodeAllFiles/.tracker.bin/better-32 173345 174367 +0.59% BenchmarkDecoder_DecodeAllFiles/.tracker.bin/best-32 200077 200729 +0.33% BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/fastest-32 1340305 1320598 -1.47% BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/default-32 1356206 1348648 -0.56% BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/better-32 1279765 1262501 -1.35% BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/best-32 1326312 1290128 -2.73% BenchmarkDecoder_DecodeAllFiles/e.txt/fastest-32 7429 7409 -0.27% BenchmarkDecoder_DecodeAllFiles/e.txt/default-32 337217 332148 -1.50% BenchmarkDecoder_DecodeAllFiles/e.txt/better-32 264014 259935 -1.54% BenchmarkDecoder_DecodeAllFiles/e.txt/best-32 190466 190781 +0.17% BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/fastest-32 3195 3159 -1.13% BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/default-32 3096 2918 -5.75% BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/better-32 3564 3528 -1.01% BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/best-32 9356 9213 -1.53% BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/fastest-32 5209 5164 -0.86% BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/default-32 6511 6430 -1.24% BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/better-32 6484 6382 -1.57% BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/best-32 6963 6917 -0.66% BenchmarkDecoder_DecodeAllFiles/html.txt/fastest-32 104999 104529 -0.45% BenchmarkDecoder_DecodeAllFiles/html.txt/default-32 108301 109125 +0.76% BenchmarkDecoder_DecodeAllFiles/html.txt/better-32 101897 103069 +1.15% BenchmarkDecoder_DecodeAllFiles/html.txt/best-32 111684 108225 -3.10% BenchmarkDecoder_DecodeAllFiles/pi.txt/fastest-32 7440 7454 +0.19% BenchmarkDecoder_DecodeAllFiles/pi.txt/default-32 339353 330973 -2.47% BenchmarkDecoder_DecodeAllFiles/pi.txt/better-32 262241 259706 -0.97% BenchmarkDecoder_DecodeAllFiles/pi.txt/best-32 192520 190490 -1.05% BenchmarkDecoder_DecodeAllFiles/pngdata.bin/fastest-32 27357 26713 -2.35% BenchmarkDecoder_DecodeAllFiles/pngdata.bin/default-32 30316 30122 -0.64% BenchmarkDecoder_DecodeAllFiles/pngdata.bin/better-32 25379 24816 -2.22% BenchmarkDecoder_DecodeAllFiles/pngdata.bin/best-32 30487 29783 -2.31% BenchmarkDecoder_DecodeAllFiles/sharnd.out/fastest-32 7491 7386 -1.40% BenchmarkDecoder_DecodeAllFiles/sharnd.out/default-32 7471 7469 -0.03% BenchmarkDecoder_DecodeAllFiles/sharnd.out/better-32 7480 7399 -1.08% BenchmarkDecoder_DecodeAllFiles/sharnd.out/best-32 7442 7445 +0.04% BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/fastest-32 93914 96888 +3.17% BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/default-32 70662 66887 -5.34% BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/better-32 64330 60733 -5.59% BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/best-32 46892 45656 -2.64% BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/fastest-32 1706 1702 -0.23% BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/default-32 10369 10491 +1.18% BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/better-32 9685 9589 -0.99% BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/best-32 11170 10915 -2.28% BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/fastest-32 91486 82267 -10.08% BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/default-32 74032 70317 -5.02% BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/better-32 68588 66062 -3.68% BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/best-32 71520 68268 -4.55% BenchmarkDecoder_DecodeAllFilesP/e.txt/fastest-32 1935 1918 -0.88% BenchmarkDecoder_DecodeAllFilesP/e.txt/default-32 20006 19697 -1.54% BenchmarkDecoder_DecodeAllFilesP/e.txt/better-32 14810 14510 -2.03% BenchmarkDecoder_DecodeAllFilesP/e.txt/best-32 10840 10862 +0.20% BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/fastest-32 1353 1315 -2.81% BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/default-32 1356 1339 -1.25% BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/better-32 1283 1280 -0.23% BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/best-32 1225 1216 -0.73% BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/fastest-32 1295 1336 +3.17% BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/default-32 1298 1283 -1.16% BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/better-32 1236 1255 +1.54% BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/best-32 1270 1207 -4.96% BenchmarkDecoder_DecodeAllFilesP/html.txt/fastest-32 5840 5875 +0.60% BenchmarkDecoder_DecodeAllFilesP/html.txt/default-32 6230 6067 -2.62% BenchmarkDecoder_DecodeAllFilesP/html.txt/better-32 5711 5619 -1.61% BenchmarkDecoder_DecodeAllFilesP/html.txt/best-32 6459 6405 -0.84% BenchmarkDecoder_DecodeAllFilesP/pi.txt/fastest-32 1922 1943 +1.09% BenchmarkDecoder_DecodeAllFilesP/pi.txt/default-32 20029 19041 -4.93% BenchmarkDecoder_DecodeAllFilesP/pi.txt/better-32 14729 14440 -1.96% BenchmarkDecoder_DecodeAllFilesP/pi.txt/best-32 10857 10861 +0.04% BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/fastest-32 1566 1481 -5.43% BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/default-32 1609 1616 +0.44% BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/better-32 1351 1334 -1.26% BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/best-32 1531 1525 -0.39% BenchmarkDecoder_DecodeAllFilesP/sharnd.out/fastest-32 1932 1921 -0.57% BenchmarkDecoder_DecodeAllFilesP/sharnd.out/default-32 1948 1947 -0.05% BenchmarkDecoder_DecodeAllFilesP/sharnd.out/better-32 1924 1903 -1.09% BenchmarkDecoder_DecodeAllFilesP/sharnd.out/best-32 1939 1892 -2.42% BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 34626 30036 -13.26% BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 7397 6822 -7.77% BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 113773 96432 -15.24% BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 84246 72464 -13.99% BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 28424 24778 -12.83% BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 38002 32737 -13.85% BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 13434 13014 -3.13% BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1463 1349 -7.79% BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 2014 2022 +0.40% BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 86544 78091 -9.77% BenchmarkDecoder_DecodeAllParallel/html.zst-32 8358 7631 -8.70% BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 787 794 +0.81% BenchmarkDecoderSilesia-32 426845533 382465000 -10.40% BenchmarkDecoderEnwik9-32 297446375 268355150 -9.78% benchmark old MB/s new MB/s speedup BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32 329.12 328.78 1.00x BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32 925.59 929.03 1.00x BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32 261.29 261.14 1.00x BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32 313.80 320.46 1.02x BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32 280.49 281.60 1.00x BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32 258.66 265.27 1.03x BenchmarkDecoder_DecoderSmall/html_x_4.zst-32 2105.98 2291.22 1.09x BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32 4069.70 4123.55 1.01x BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32 12714.88 12625.68 0.99x BenchmarkDecoder_DecoderSmall/urls.10K.zst-32 459.60 467.82 1.02x BenchmarkDecoder_DecoderSmall/html.zst-32 741.27 744.94 1.00x BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32 389.88 395.26 1.01x BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32 337.67 343.40 1.02x BenchmarkDecoder_DecodeAll/geo.protodata.zst-32 934.89 949.78 1.02x BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32 277.36 282.49 1.02x BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32 331.58 336.38 1.01x BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32 281.91 285.82 1.01x BenchmarkDecoder_DecodeAll/alice29.txt.zst-32 262.32 269.20 1.03x BenchmarkDecoder_DecodeAll/html_x_4.zst-32 1691.06 1706.95 1.01x BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32 4384.52 4509.41 1.03x BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32 13536.42 13513.20 1.00x BenchmarkDecoder_DecodeAll/urls.10K.zst-32 485.70 492.16 1.01x BenchmarkDecoder_DecodeAll/html.zst-32 750.44 756.00 1.01x BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32 392.61 399.86 1.02x BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/fastest-32 319.22 337.80 1.06x BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/default-32 344.15 359.05 1.04x BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/better-32 387.46 397.81 1.03x BenchmarkDecoder_DecodeAllFiles/.tracker-unpacked.bin/best-32 520.53 536.60 1.03x BenchmarkDecoder_DecodeAllFiles/.tracker.bin/fastest-32 13301.00 13264.44 1.00x BenchmarkDecoder_DecodeAllFiles/.tracker.bin/default-32 323.66 326.13 1.01x BenchmarkDecoder_DecodeAllFiles/.tracker.bin/better-32 346.13 344.10 0.99x BenchmarkDecoder_DecodeAllFiles/.tracker.bin/best-32 299.88 298.91 1.00x BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/fastest-32 289.46 293.78 1.01x BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/default-32 286.07 287.67 1.01x BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/better-32 303.15 307.30 1.01x BenchmarkDecoder_DecodeAllFiles/Mark.Twain-Tom.Sawyer.txt/best-32 292.51 300.72 1.03x BenchmarkDecoder_DecodeAllFiles/e.txt/fastest-32 13462.02 13496.77 1.00x BenchmarkDecoder_DecodeAllFiles/e.txt/default-32 296.55 301.08 1.02x BenchmarkDecoder_DecodeAllFiles/e.txt/better-32 378.78 384.72 1.02x BenchmarkDecoder_DecodeAllFiles/e.txt/best-32 525.05 524.18 1.00x BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/fastest-32 1288.21 1302.89 1.01x BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/default-32 1329.47 1410.42 1.06x BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/better-32 1154.95 1166.67 1.01x BenchmarkDecoder_DecodeAllFiles/fse-artifact3.bin/best-32 439.95 446.75 1.02x BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/fastest-32 297.17 299.77 1.01x BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/default-32 237.77 240.75 1.01x BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/better-32 238.73 242.55 1.02x BenchmarkDecoder_DecodeAllFiles/gettysburg.txt/best-32 222.31 223.81 1.01x BenchmarkDecoder_DecodeAllFiles/html.txt/fastest-32 423.59 425.50 1.00x BenchmarkDecoder_DecodeAllFiles/html.txt/default-32 410.68 407.58 0.99x BenchmarkDecoder_DecodeAllFiles/html.txt/better-32 436.49 431.53 0.99x BenchmarkDecoder_DecodeAllFiles/html.txt/best-32 398.24 410.97 1.03x BenchmarkDecoder_DecodeAllFiles/pi.txt/fastest-32 13440.83 13415.45 1.00x BenchmarkDecoder_DecodeAllFiles/pi.txt/default-32 294.69 302.15 1.03x BenchmarkDecoder_DecodeAllFiles/pi.txt/better-32 381.34 385.06 1.01x BenchmarkDecoder_DecodeAllFiles/pi.txt/best-32 519.44 524.98 1.01x BenchmarkDecoder_DecodeAllFiles/pngdata.bin/fastest-32 1871.53 1916.67 1.02x BenchmarkDecoder_DecodeAllFiles/pngdata.bin/default-32 1688.89 1699.76 1.01x BenchmarkDecoder_DecodeAllFiles/pngdata.bin/better-32 2017.44 2063.17 1.02x BenchmarkDecoder_DecodeAllFiles/pngdata.bin/best-32 1679.40 1719.09 1.02x BenchmarkDecoder_DecodeAllFiles/sharnd.out/fastest-32 13350.37 13538.80 1.01x BenchmarkDecoder_DecodeAllFiles/sharnd.out/default-32 13385.38 13389.11 1.00x BenchmarkDecoder_DecodeAllFiles/sharnd.out/better-32 13369.65 13516.52 1.01x BenchmarkDecoder_DecodeAllFiles/sharnd.out/best-32 13437.24 13431.84 1.00x BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/fastest-32 5111.06 4954.15 0.97x BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/default-32 6792.94 7176.26 1.06x BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/better-32 7461.50 7903.48 1.06x BenchmarkDecoder_DecodeAllFilesP/.tracker-unpacked.bin/best-32 10236.18 10513.47 1.03x BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/fastest-32 35163.03 35254.05 1.00x BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/default-32 5786.32 5719.16 0.99x BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/better-32 6194.89 6257.03 1.01x BenchmarkDecoder_DecodeAllFilesP/.tracker.bin/best-32 5371.72 5497.26 1.02x BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/fastest-32 4240.71 4715.91 1.11x BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/default-32 5240.50 5517.37 1.05x BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/better-32 5656.41 5872.77 1.04x BenchmarkDecoder_DecodeAllFilesP/Mark.Twain-Tom.Sawyer.txt/best-32 5424.53 5682.96 1.05x BenchmarkDecoder_DecodeAllFilesP/e.txt/fastest-32 51685.02 52145.98 1.01x BenchmarkDecoder_DecodeAllFilesP/e.txt/default-32 4998.65 5077.00 1.02x BenchmarkDecoder_DecodeAllFilesP/e.txt/better-32 6752.55 6892.07 1.02x BenchmarkDecoder_DecodeAllFilesP/e.txt/best-32 9225.05 9207.07 1.00x BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/fastest-32 3043.07 3129.02 1.03x BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/default-32 3035.30 3074.23 1.01x BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/better-32 3209.01 3214.71 1.00x BenchmarkDecoder_DecodeAllFilesP/fse-artifact3.bin/best-32 3360.11 3383.82 1.01x BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/fastest-32 1195.22 1158.31 0.97x BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/default-32 1192.30 1206.40 1.01x BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/better-32 1252.38 1233.71 0.99x BenchmarkDecoder_DecodeAllFilesP/gettysburg.txt/best-32 1219.26 1282.15 1.05x BenchmarkDecoder_DecodeAllFilesP/html.txt/fastest-32 7615.58 7570.06 0.99x BenchmarkDecoder_DecodeAllFilesP/html.txt/default-32 7139.51 7330.77 1.03x BenchmarkDecoder_DecodeAllFilesP/html.txt/better-32 7787.61 7914.88 1.02x BenchmarkDecoder_DecodeAllFilesP/html.txt/best-32 6886.49 6944.28 1.01x BenchmarkDecoder_DecodeAllFilesP/pi.txt/fastest-32 52022.79 51480.17 0.99x BenchmarkDecoder_DecodeAllFilesP/pi.txt/default-32 4992.98 5251.99 1.05x BenchmarkDecoder_DecodeAllFilesP/pi.txt/better-32 6789.42 6925.45 1.02x BenchmarkDecoder_DecodeAllFilesP/pi.txt/best-32 9210.74 9207.14 1.00x BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/fastest-32 32689.14 34582.57 1.06x BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/default-32 31830.69 31687.49 1.00x BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/better-32 37892.96 38369.48 1.01x BenchmarkDecoder_DecodeAllFilesP/pngdata.bin/best-32 33442.97 33568.58 1.00x BenchmarkDecoder_DecodeAllFilesP/sharnd.out/fastest-32 51761.39 52071.32 1.01x BenchmarkDecoder_DecodeAllFilesP/sharnd.out/default-32 51331.54 51353.05 1.00x BenchmarkDecoder_DecodeAllFilesP/sharnd.out/better-32 51966.35 52553.14 1.01x BenchmarkDecoder_DecodeAllFilesP/sharnd.out/best-32 51568.12 52848.90 1.02x BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 5323.13 6136.61 1.15x BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 16031.69 17383.02 1.08x BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 4235.29 4996.90 1.18x BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 5065.58 5889.20 1.16x BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 4403.99 5051.92 1.15x BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 4002.09 4645.74 1.16x BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 30489.31 31474.10 1.03x BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 69969.58 75900.46 1.08x BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 61103.57 60874.06 1.00x BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 8112.48 8990.66 1.11x BenchmarkDecoder_DecodeAllParallel/html.zst-32 12252.21 13419.37 1.10x BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 5177.63 5135.95 0.99x BenchmarkDecoderSilesia-32 496.54 554.16 1.12x BenchmarkDecoderEnwik9-32 336.20 372.64 1.11x ``` --- zstd/blockdec.go | 1 + zstd/decoder_test.go | 2 +- zstd/seqdec.go | 101 +++++++++++++++++++++++-------------------- 3 files changed, 57 insertions(+), 47 deletions(-) diff --git a/zstd/blockdec.go b/zstd/blockdec.go index e5a38d1408..607b62ee37 100644 --- a/zstd/blockdec.go +++ b/zstd/blockdec.go @@ -632,6 +632,7 @@ func (b *blockDec) decodeSequences(hist *history) error { } b.sequence = b.sequence[:hist.decoders.nSeqs] if hist.decoders.nSeqs == 0 { + hist.decoders.seqSize = len(hist.decoders.literals) return nil } hist.decoders.prevOffset = hist.recentOffsets diff --git a/zstd/decoder_test.go b/zstd/decoder_test.go index 5eeebada6c..3025c4fc78 100644 --- a/zstd/decoder_test.go +++ b/zstd/decoder_test.go @@ -1052,7 +1052,7 @@ func testDecoderFile(t *testing.T, fn string, newDec func() (*Decoder, error)) { } wg.Wait() if gotError != nil { - t.Error(err) + t.Error(gotError, err) if err != ErrCRCMismatch { return } diff --git a/zstd/seqdec.go b/zstd/seqdec.go index e367281465..60b043191c 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -170,7 +170,9 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { br.fillFast() } else { if br.overread() { - printf("reading sequence %d, exceeded available data\n", i) + if debugDecoder { + printf("reading sequence %d, exceeded available data\n", i) + } return io.ErrUnexpectedEOF } ll, mo, ml = s.next(br, llState, mlState, ofState) @@ -248,76 +250,86 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { } if debugDecoder { - printf("Execute %d seqs with hist %d, dict %d, literals: %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals)) + printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize) } + var t = len(s.out) + s.out = s.out[:t+s.seqSize] + for _, seq := range seqs { // Add literals - s.out = append(s.out, s.literals[:seq.ll]...) + copy(s.out[t:], s.literals[:seq.ll]) + t += seq.ll s.literals = s.literals[seq.ll:] - out := s.out - // Copy form dictionary... - if seq.mo > len(s.out)+len(hist) || seq.mo > s.windowSize { + // Copy from dictionary... + if seq.mo > t+len(hist) || seq.mo > s.windowSize { if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) } // we may be in dictionary. - dictO := len(s.dict) - (seq.mo - (len(s.out) + len(hist))) + dictO := len(s.dict) - (seq.mo - (t + len(hist))) if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, len(s.out)+len(hist)+len(s.dict)) + return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict)) } end := dictO + seq.ml if end > len(s.dict) { - out = append(out, s.dict[dictO:]...) - seq.mo -= len(s.dict) - dictO - seq.ml -= len(s.dict) - dictO + n := len(s.dict) - dictO + copy(s.out[t:], s.dict[dictO:]) + t += n + seq.ml -= n } else { - s.out = append(out, s.dict[dictO:end]...) + copy(s.out[t:], s.dict[dictO:end]) + t += end - dictO continue } } // Copy from history. - if v := seq.mo - len(s.out); v > 0 { + if v := seq.mo - t; v > 0 { // v is the start position in history from end. start := len(hist) - v if seq.ml > v { // Some goes into current block. // Copy remainder of history - out = append(out, hist[start:]...) - seq.mo -= v + copy(s.out[t:], hist[start:]) + t += v seq.ml -= v } else { - s.out = append(out, hist[start:start+seq.ml]...) + copy(s.out[t:], hist[start:start+seq.ml]) + t += seq.ml continue } } // We must be in current buffer now if seq.ml > 0 { - start := len(s.out) - seq.mo - if seq.ml <= len(s.out)-start { + start := t - seq.mo + if seq.ml <= t-start { // No overlap - s.out = append(out, s.out[start:start+seq.ml]...) + copy(s.out[t:], s.out[start:start+seq.ml]) + t += seq.ml continue } else { // Overlapping copy // Extend destination slice and copy one byte at the time. - out = out[:len(out)+seq.ml] - src := out[start : start+seq.ml] + src := s.out[start : start+seq.ml] // Destination is the space we just added. - dst := out[len(out)-seq.ml:] - dst = dst[:len(src)] for i := range src { - dst[i] = src[i] + s.out[t] = src[i] + t++ } } } - s.out = out } // Add final literals - s.out = append(s.out, s.literals...) + copy(s.out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(s.out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(s.out), t, s.seqSize)) + } + } return nil } @@ -331,6 +343,7 @@ func (s *sequenceDecs) decodeSync(history *history) error { llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state hist := history.b[history.ignoreBuffer:] + out := s.out for i := seqs - 1; i >= 0; i-- { if br.overread() { @@ -408,51 +421,49 @@ func (s *sequenceDecs) decodeSync(history *history) error { if ll > len(s.literals) { return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals)) } - size := ll + ml + len(s.out) + size := ll + ml + len(out) if size-startSize > maxBlockSize { return fmt.Errorf("output (%d) bigger than max block size", size) } - if size > cap(s.out) { + if size > cap(out) { // Not enough size, which can happen under high volume block streaming conditions // but could be if destination slice is too small for sync operations. // over-allocating here can create a large amount of GC pressure so we try to keep // it as contained as possible - used := len(s.out) - startSize + used := len(out) - startSize addBytes := 256 + ll + ml + used>>2 // Clamp to max block size. if used+addBytes > maxBlockSize { addBytes = maxBlockSize - used } - s.out = append(s.out, make([]byte, addBytes)...) - s.out = s.out[:len(s.out)-addBytes] + out = append(out, make([]byte, addBytes)...) + out = out[:len(out)-addBytes] } if ml > maxMatchLen { return fmt.Errorf("match len (%d) bigger than max allowed length", ml) } // Add literals - s.out = append(s.out, s.literals[:ll]...) + out = append(out, s.literals[:ll]...) s.literals = s.literals[ll:] - out := s.out if mo == 0 && ml > 0 { return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) } - if mo > len(s.out)+len(hist) || mo > s.windowSize { + if mo > len(out)+len(hist) || mo > s.windowSize { if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)) } // we may be in dictionary. - dictO := len(s.dict) - (mo - (len(s.out) + len(hist))) + dictO := len(s.dict) - (mo - (len(out) + len(hist))) if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)) } end := dictO + ml if end > len(s.dict) { out = append(out, s.dict[dictO:]...) - mo -= len(s.dict) - dictO ml -= len(s.dict) - dictO } else { out = append(out, s.dict[dictO:end]...) @@ -463,14 +474,13 @@ func (s *sequenceDecs) decodeSync(history *history) error { // Copy from history. // TODO: Blocks without history could be made to ignore this completely. - if v := mo - len(s.out); v > 0 { + if v := mo - len(out); v > 0 { // v is the start position in history from end. start := len(hist) - v if ml > v { // Some goes into current block. // Copy remainder of history out = append(out, hist[start:]...) - mo -= v ml -= v } else { out = append(out, hist[start:start+ml]...) @@ -479,10 +489,10 @@ func (s *sequenceDecs) decodeSync(history *history) error { } // We must be in current buffer now if ml > 0 { - start := len(s.out) - mo - if ml <= len(s.out)-start { + start := len(out) - mo + if ml <= len(out)-start { // No overlap - out = append(out, s.out[start:start+ml]...) + out = append(out, out[start:start+ml]...) } else { // Overlapping copy // Extend destination slice and copy one byte at the time. @@ -496,7 +506,6 @@ func (s *sequenceDecs) decodeSync(history *history) error { } } } - s.out = out if i == 0 { // This is the last sequence, so we shouldn't update state. break @@ -524,7 +533,7 @@ func (s *sequenceDecs) decodeSync(history *history) error { } // Add final literals - s.out = append(s.out, s.literals...) + s.out = append(out, s.literals...) return br.close() } From 7f9758cb1c4b6cdc6ae4b9520bebe94aa19d2376 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 1 Mar 2022 18:10:09 +0100 Subject: [PATCH 2/3] Keep output local --- zstd/seqdec.go | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/zstd/seqdec.go b/zstd/seqdec.go index 60b043191c..066927e1e8 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -254,11 +254,11 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { } var t = len(s.out) - s.out = s.out[:t+s.seqSize] + out := s.out[:t+s.seqSize] for _, seq := range seqs { // Add literals - copy(s.out[t:], s.literals[:seq.ll]) + copy(out[t:], s.literals[:seq.ll]) t += seq.ll s.literals = s.literals[seq.ll:] @@ -276,11 +276,11 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { end := dictO + seq.ml if end > len(s.dict) { n := len(s.dict) - dictO - copy(s.out[t:], s.dict[dictO:]) + copy(out[t:], s.dict[dictO:]) t += n seq.ml -= n } else { - copy(s.out[t:], s.dict[dictO:end]) + copy(out[t:], s.dict[dictO:end]) t += end - dictO continue } @@ -293,11 +293,11 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { if seq.ml > v { // Some goes into current block. // Copy remainder of history - copy(s.out[t:], hist[start:]) + copy(out[t:], hist[start:]) t += v seq.ml -= v } else { - copy(s.out[t:], hist[start:start+seq.ml]) + copy(out[t:], hist[start:start+seq.ml]) t += seq.ml continue } @@ -307,29 +307,30 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { start := t - seq.mo if seq.ml <= t-start { // No overlap - copy(s.out[t:], s.out[start:start+seq.ml]) + copy(out[t:], out[start:start+seq.ml]) t += seq.ml continue } else { // Overlapping copy // Extend destination slice and copy one byte at the time. - src := s.out[start : start+seq.ml] + src := out[start : start+seq.ml] // Destination is the space we just added. for i := range src { - s.out[t] = src[i] + out[t] = src[i] t++ } } } } // Add final literals - copy(s.out[t:], s.literals) + copy(out[t:], s.literals) if debugDecoder { t += len(s.literals) - if t != len(s.out) { - panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(s.out), t, s.seqSize)) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) } } + s.out = out return nil } From 52a9586915644a7aaf7b2ceccaf5de826607a504 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 1 Mar 2022 18:21:41 +0100 Subject: [PATCH 3/3] Move bounds check out of loop. --- zstd/seqdec.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zstd/seqdec.go b/zstd/seqdec.go index 066927e1e8..213736ad77 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -314,10 +314,12 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { // Overlapping copy // Extend destination slice and copy one byte at the time. src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) // Destination is the space we just added. for i := range src { - out[t] = src[i] - t++ + dst[i] = src[i] } } }