diff --git a/s2/README.md b/s2/README.md index 700f997c28..1d80c42a53 100644 --- a/s2/README.md +++ b/s2/README.md @@ -325,35 +325,35 @@ The content compressed in this mode is fully compatible with the standard decode Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU): -| File | S2 speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller | -|-----------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------| -| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 12.70x | 10556 MB/s | 7.35% | 4.15x | 3455 MB/s | 12.79% | -| (1 CPU) | 1.14x | 948 MB/s | - | 0.42x | 349 MB/s | - | -| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 17.13x | 14484 MB/s | 31.60% | 10.09x | 8533 MB/s | 37.71% | -| (1 CPU) | 1.33x | 1127 MB/s | - | 0.70x | 589 MB/s | - | -| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12000 MB/s | -5.79% | 6.59x | 5223 MB/s | 5.80% | -| (1 CPU) | 1.11x | 877 MB/s | - | 0.47x | 370 MB/s | - | -| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 14.62x | 12116 MB/s | 15.90% | 5.35x | 4430 MB/s | 16.08% | -| (1 CPU) | 1.38x | 1146 MB/s | - | 0.38x | 312 MB/s | - | -| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 8.83x | 17579 MB/s | 43.86% | 6.54x | 13011 MB/s | 47.23% | -| (1 CPU) | 1.14x | 2259 MB/s | - | 0.74x | 1475 MB/s | - | -| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 16.72x | 14019 MB/s | 24.02% | 10.11x | 8477 MB/s | 30.48% | -| (1 CPU) | 1.24x | 1043 MB/s | - | 0.70x | 586 MB/s | - | -| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9254 MB/s | 1.84% | 6.75x | 4686 MB/s | 6.72% | -| (1 CPU) | 0.97x | 672 MB/s | - | 0.53x | 366 MB/s | - | -| sharnd.out.2gb | 2.11x | 12639 MB/s | 0.01% | 1.98x | 11833 MB/s | 0.01% | -| (1 CPU) | 0.93x | 5594 MB/s | - | 1.34x | 8030 MB/s | - | -| [enwik9](http://mattmahoney.net/dc/textdata.html) | 19.34x | 8220 MB/s | 3.98% | 7.87x | 3345 MB/s | 15.82% | -| (1 CPU) | 1.06x | 452 MB/s | - | 0.50x | 213 MB/s | - | -| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 10.48x | 6124 MB/s | 5.67% | 3.76x | 2197 MB/s | 12.60% | -| (1 CPU) | 0.97x | 568 MB/s | - | 0.46x | 271 MB/s | - | -| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 21.07x | 9020 MB/s | 6.36% | 6.91x | 2959 MB/s | 16.95% | -| (1 CPU) | 1.07x | 460 MB/s | - | 0.51x | 220 MB/s | - | +| File | S2 Speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller | +|---------------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------| +| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 16.33x | 10556 MB/s | 8.0% | 6.04x | 5252 MB/s | 14.7% | +| (1 CPU) | 1.08x | 940 MB/s | - | 0.46x | 400 MB/s | - | +| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 16.51x | 15224 MB/s | 31.70% | 9.47x | 8734 MB/s | 37.71% | +| (1 CPU) | 1.26x | 1157 MB/s | - | 0.60x | 556 MB/s | - | +| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12598 MB/s | -5.76% | 6.23x | 5675 MB/s | 3.62% | +| (1 CPU) | 1.02x | 932 MB/s | - | 0.47x | 432 MB/s | - | +| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 11.21x | 12116 MB/s | 15.95% | 3.24x | 3500 MB/s | 18.00% | +| (1 CPU) | 1.05x | 1135 MB/s | - | 0.27x | 292 MB/s | - | +| [apache.log](https://files.klauspost.com/compress/apache.log.zst) | 8.55x | 16673 MB/s | 20.54% | 5.85x | 11420 MB/s | 24.97% | +| (1 CPU) | 1.91x | 1771 MB/s | - | 0.53x | 1041 MB/s | - | +| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 15.76x | 14357 MB/s | 24.01% | 8.67x | 7891 MB/s | 33.68% | +| (1 CPU) | 1.17x | 1064 MB/s | - | 0.65x | 595 MB/s | - | +| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9835 MB/s | 2.34% | 6.85x | 4863 MB/s | 9.96% | +| (1 CPU) | 0.97x | 689 MB/s | - | 0.55x | 387 MB/s | - | +| sharnd.out.2gb | 9.11x | 13213 MB/s | 0.01% | 1.49x | 9184 MB/s | 0.01% | +| (1 CPU) | 0.88x | 5418 MB/s | - | 0.77x | 5417 MB/s | - | +| [sofia-air-quality-dataset csv](https://files.klauspost.com/compress/sofia-air-quality-dataset.tar.zst) | 22.00x | 11477 MB/s | 18.73% | 11.15x | 5817 MB/s | 27.88% | +| (1 CPU) | 1.23x | 642 MB/s | - | 0.71x | 642 MB/s | - | +| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 11.23x | 6520 MB/s | 5.9% | 5.35x | 3109 MB/s | 15.88% | +| (1 CPU) | 1.05x | 607 MB/s | - | 0.52x | 304 MB/s | - | +| [enwik9](https://files.klauspost.com/compress/enwik9.zst) | 19.28x | 8440 MB/s | 4.04% | 9.31x | 4076 MB/s | 18.04% | +| (1 CPU) | 1.12x | 488 MB/s | - | 0.57x | 250 MB/s | - | ### Legend -* `S2 speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core. -* `S2 throughput`: Throughput of S2 in MB/s. +* `S2 Speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core. +* `S2 Throughput`: Throughput of S2 in MB/s. * `S2 % smaller`: How many percent of the Snappy output size is S2 better. * `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy. * `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy. @@ -361,7 +361,7 @@ Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all th There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads. -Machine generated data gets by far the biggest compression boost, with size being being reduced by up to 45% of Snappy size. +Machine generated data gets by far the biggest compression boost, with size being reduced by up to 35% of Snappy size. The "better" compression mode sees a good improvement in all cases, but usually at a performance cost. @@ -404,15 +404,15 @@ The "better" compression mode will actively look for shorter matches, which is w Without assembly decompression is also very fast; single goroutine decompression speed. No assembly: | File | S2 Throughput | S2 throughput | -|--------------------------------|--------------|---------------| -| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s | -| 10gb.tar.s2 | 1.30x | 867.07 MB/s | -| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s | -| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s | -| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s | -| enwik9.s2 | 1.67x | 681.53 MB/s | -| adresser.json.s2 | 3.41x | 4230.53 MB/s | -| silesia.tar.s2 | 1.52x | 811.58 | +|--------------------------------|---------------|---------------| +| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s | +| 10gb.tar.s2 | 1.30x | 867.07 MB/s | +| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s | +| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s | +| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s | +| enwik9.s2 | 1.67x | 681.53 MB/s | +| adresser.json.s2 | 3.41x | 4230.53 MB/s | +| silesia.tar.s2 | 1.52x | 811.58 | Even though S2 typically compresses better than Snappy, decompression speed is always better. @@ -450,14 +450,14 @@ The most reliable is a wide dataset. For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z), 53927 files, total input size: 4,014,735,833 bytes. Single goroutine used. -| * | Input | Output | Reduction | MB/s | -|-------------------|------------|------------|-----------|--------| -| S2 | 4014735833 | 1059723369 | 73.60% | **934.34** | -| S2 Better | 4014735833 | 969670507 | 75.85% | 532.70 | -| S2 Best | 4014735833 | 906625668 | **77.85%** | 46.84 | -| Snappy | 4014735833 | 1128706759 | 71.89% | 762.59 | -| S2, Snappy Output | 4014735833 | 1093821420 | 72.75% | 908.60 | -| LZ4 | 4014735833 | 1079259294 | 73.12% | 526.94 | +| * | Input | Output | Reduction | MB/s | +|-------------------|------------|------------|------------|------------| +| S2 | 4014735833 | 1059723369 | 73.60% | **936.73** | +| S2 Better | 4014735833 | 961580539 | 76.05% | 451.10 | +| S2 Best | 4014735833 | 899182886 | **77.60%** | 46.84 | +| Snappy | 4014735833 | 1128706759 | 71.89% | 790.15 | +| S2, Snappy Output | 4014735833 | 1093823291 | 72.75% | 936.60 | +| LZ4 | 4014735833 | 1063768713 | 73.50% | 452.02 | S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best". "Better" mode provides the same compression speed as LZ4 with better compression ratio. @@ -489,42 +489,23 @@ AMD64 assembly is use for both S2 and Snappy. | Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec | |-----------------------|-------------|---------|--------------|-------------|-------------|-------------| -| html | 22843 | 21111 | 16246 MB/s | 17438 MB/s | 40972 MB/s | 49263 MB/s | -| urls.10K | 335492 | 287326 | 7943 MB/s | 9693 MB/s | 22523 MB/s | 26484 MB/s | -| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 273889 MB/s | 718321 MB/s | 827552 MB/s | -| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 17773 MB/s | 33691 MB/s | 52421 MB/s | -| paper-100k.pdf | 85304 | 84459 | 167546 MB/s | 101263 MB/s | 326905 MB/s | 291944 MB/s | -| html_x_4 | 92234 | 21113 | 15194 MB/s | 50670 MB/s | 30843 MB/s | 32217 MB/s | -| alice29.txt | 88034 | 85975 | 5936 MB/s | 6139 MB/s | 12882 MB/s | 20044 MB/s | -| asyoulik.txt | 77503 | 79650 | 5517 MB/s | 6366 MB/s | 12735 MB/s | 22806 MB/s | -| lcet10.txt | 234661 | 220670 | 6235 MB/s | 6067 MB/s | 14519 MB/s | 18697 MB/s | -| plrabn12.txt | 319267 | 317985 | 5159 MB/s | 5726 MB/s | 11923 MB/s | 19901 MB/s | -| geo.protodata | 23335 | 18690 | 21220 MB/s | 26529 MB/s | 56271 MB/s | 62540 MB/s | -| kppkn.gtb | 69526 | 65312 | 9732 MB/s | 8559 MB/s | 18491 MB/s | 18969 MB/s | -| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 15489 MB/s | 31883 MB/s | 38874 MB/s | -| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13000 MB/s | 48056 MB/s | 52341 MB/s | -| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12806 MB/s | 32378 MB/s | 46322 MB/s | -| alice29.txt (20000B) | 12686 | 13574 | 7733 MB/s | 11210 MB/s | 30566 MB/s | 58969 MB/s | - - -| Relative Perf | Snappy size | S2 size improved | S2 Speed | S2 Dec Speed | -|-----------------------|-------------|------------------|----------|--------------| -| html | 22.31% | 7.58% | 1.07x | 1.20x | -| urls.10K | 47.78% | 14.36% | 1.22x | 1.18x | -| fireworks.jpeg | 99.95% | -0.05% | 0.78x | 1.15x | -| fireworks.jpeg (200B) | 73.00% | -6.16% | 2.00x | 1.56x | -| paper-100k.pdf | 83.30% | 0.99% | 0.60x | 0.89x | -| html_x_4 | 22.52% | 77.11% | 3.33x | 1.04x | -| alice29.txt | 57.88% | 2.34% | 1.03x | 1.56x | -| asyoulik.txt | 61.91% | -2.77% | 1.15x | 1.79x | -| lcet10.txt | 54.99% | 5.96% | 0.97x | 1.29x | -| plrabn12.txt | 66.26% | 0.40% | 1.11x | 1.67x | -| geo.protodata | 19.68% | 19.91% | 1.25x | 1.11x | -| kppkn.gtb | 37.72% | 6.06% | 0.88x | 1.03x | -| alice29.txt (128B) | 62.50% | -2.50% | 2.31x | 1.22x | -| alice29.txt (1000B) | 77.40% | 0.00% | 1.07x | 1.09x | -| alice29.txt (10000B) | 66.48% | -4.29% | 1.27x | 1.43x | -| alice29.txt (20000B) | 63.43% | -7.00% | 1.45x | 1.93x | +| html | 22843 | 20868 | 16246 MB/s | 18617 MB/s | 40972 MB/s | 49263 MB/s | +| urls.10K | 335492 | 286541 | 7943 MB/s | 10201 MB/s | 22523 MB/s | 26484 MB/s | +| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 303228 MB/s | 718321 MB/s | 827552 MB/s | +| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 20180 MB/s | 33691 MB/s | 52421 MB/s | +| paper-100k.pdf | 85304 | 84202 | 167546 MB/s | 112988 MB/s | 326905 MB/s | 291944 MB/s | +| html_x_4 | 92234 | 20870 | 15194 MB/s | 54457 MB/s | 30843 MB/s | 32217 MB/s | +| alice29.txt | 88034 | 85934 | 5936 MB/s | 6540 MB/s | 12882 MB/s | 20044 MB/s | +| asyoulik.txt | 77503 | 79575 | 5517 MB/s | 6657 MB/s | 12735 MB/s | 22806 MB/s | +| lcet10.txt | 234661 | 220383 | 6235 MB/s | 6303 MB/s | 14519 MB/s | 18697 MB/s | +| plrabn12.txt | 319267 | 318196 | 5159 MB/s | 6074 MB/s | 11923 MB/s | 19901 MB/s | +| geo.protodata | 23335 | 18606 | 21220 MB/s | 25432 MB/s | 56271 MB/s | 62540 MB/s | +| kppkn.gtb | 69526 | 65019 | 9732 MB/s | 8905 MB/s | 18491 MB/s | 18969 MB/s | +| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 17179 MB/s | 31883 MB/s | 38874 MB/s | +| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13273 MB/s | 48056 MB/s | 52341 MB/s | +| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12824 MB/s | 32378 MB/s | 46322 MB/s | +| alice29.txt (20000B) | 12686 | 13516 | 7733 MB/s | 12160 MB/s | 30566 MB/s | 58969 MB/s | + Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size. @@ -543,42 +524,23 @@ So individual benchmarks should only be seen as a guideline and the overall pict | Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec | |-----------------------|-------------|-------------|--------------|--------------|-------------|-------------| -| html | 22843 | 19833 | 16246 MB/s | 7731 MB/s | 40972 MB/s | 40292 MB/s | -| urls.10K | 335492 | 253529 | 7943 MB/s | 3980 MB/s | 22523 MB/s | 20981 MB/s | -| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 9760 MB/s | 718321 MB/s | 823698 MB/s | -| fireworks.jpeg (200B) | 146 | 142 | 8869 MB/s | 594 MB/s | 33691 MB/s | 30101 MB/s | -| paper-100k.pdf | 85304 | 82915 | 167546 MB/s | 7470 MB/s | 326905 MB/s | 198869 MB/s | -| html_x_4 | 92234 | 19841 | 15194 MB/s | 23403 MB/s | 30843 MB/s | 30937 MB/s | -| alice29.txt | 88034 | 73218 | 5936 MB/s | 2945 MB/s | 12882 MB/s | 16611 MB/s | -| asyoulik.txt | 77503 | 66844 | 5517 MB/s | 2739 MB/s | 12735 MB/s | 14975 MB/s | -| lcet10.txt | 234661 | 190589 | 6235 MB/s | 3099 MB/s | 14519 MB/s | 16634 MB/s | -| plrabn12.txt | 319267 | 270828 | 5159 MB/s | 2600 MB/s | 11923 MB/s | 13382 MB/s | -| geo.protodata | 23335 | 18278 | 21220 MB/s | 11208 MB/s | 56271 MB/s | 57961 MB/s | -| kppkn.gtb | 69526 | 61851 | 9732 MB/s | 4556 MB/s | 18491 MB/s | 16524 MB/s | -| alice29.txt (128B) | 80 | 81 | 6691 MB/s | 529 MB/s | 31883 MB/s | 34225 MB/s | -| alice29.txt (1000B) | 774 | 748 | 12204 MB/s | 1943 MB/s | 48056 MB/s | 42068 MB/s | -| alice29.txt (10000B) | 6648 | 6234 | 10044 MB/s | 2949 MB/s | 32378 MB/s | 28813 MB/s | -| alice29.txt (20000B) | 12686 | 11584 | 7733 MB/s | 2822 MB/s | 30566 MB/s | 27315 MB/s | - - -| Relative Perf | Snappy size | Better size | Better Speed | Better dec | -|-----------------------|-------------|-------------|--------------|------------| -| html | 22.31% | 13.18% | 0.48x | 0.98x | -| urls.10K | 47.78% | 24.43% | 0.50x | 0.93x | -| fireworks.jpeg | 99.95% | -0.05% | 0.03x | 1.15x | -| fireworks.jpeg (200B) | 73.00% | 2.74% | 0.07x | 0.89x | -| paper-100k.pdf | 83.30% | 2.80% | 0.07x | 0.61x | -| html_x_4 | 22.52% | 78.49% | 0.04x | 1.00x | -| alice29.txt | 57.88% | 16.83% | 1.54x | 1.29x | -| asyoulik.txt | 61.91% | 13.75% | 0.50x | 1.18x | -| lcet10.txt | 54.99% | 18.78% | 0.50x | 1.15x | -| plrabn12.txt | 66.26% | 15.17% | 0.50x | 1.12x | -| geo.protodata | 19.68% | 21.67% | 0.50x | 1.03x | -| kppkn.gtb | 37.72% | 11.04% | 0.53x | 0.89x | -| alice29.txt (128B) | 62.50% | -1.25% | 0.47x | 1.07x | -| alice29.txt (1000B) | 77.40% | 3.36% | 0.08x | 0.88x | -| alice29.txt (10000B) | 66.48% | 6.23% | 0.16x | 0.89x | -| alice29.txt (20000B) | 63.43% | 8.69% | 0.29x | 0.89x | +| html | 22843 | 18972 | 16246 MB/s | 8621 MB/s | 40972 MB/s | 40292 MB/s | +| urls.10K | 335492 | 248079 | 7943 MB/s | 5104 MB/s | 22523 MB/s | 20981 MB/s | +| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 84429 MB/s | 718321 MB/s | 823698 MB/s | +| fireworks.jpeg (200B) | 146 | 149 | 8869 MB/s | 7125 MB/s | 33691 MB/s | 30101 MB/s | +| paper-100k.pdf | 85304 | 82887 | 167546 MB/s | 11087 MB/s | 326905 MB/s | 198869 MB/s | +| html_x_4 | 92234 | 18982 | 15194 MB/s | 29316 MB/s | 30843 MB/s | 30937 MB/s | +| alice29.txt | 88034 | 71611 | 5936 MB/s | 3709 MB/s | 12882 MB/s | 16611 MB/s | +| asyoulik.txt | 77503 | 65941 | 5517 MB/s | 3380 MB/s | 12735 MB/s | 14975 MB/s | +| lcet10.txt | 234661 | 184939 | 6235 MB/s | 3537 MB/s | 14519 MB/s | 16634 MB/s | +| plrabn12.txt | 319267 | 264990 | 5159 MB/s | 2960 MB/s | 11923 MB/s | 13382 MB/s | +| geo.protodata | 23335 | 17689 | 21220 MB/s | 10859 MB/s | 56271 MB/s | 57961 MB/s | +| kppkn.gtb | 69526 | 55398 | 9732 MB/s | 5206 MB/s | 18491 MB/s | 16524 MB/s | +| alice29.txt (128B) | 80 | 78 | 6691 MB/s | 7422 MB/s | 31883 MB/s | 34225 MB/s | +| alice29.txt (1000B) | 774 | 746 | 12204 MB/s | 5734 MB/s | 48056 MB/s | 42068 MB/s | +| alice29.txt (10000B) | 6648 | 6218 | 10044 MB/s | 6055 MB/s | 32378 MB/s | 28813 MB/s | +| alice29.txt (20000B) | 12686 | 11492 | 7733 MB/s | 3143 MB/s | 30566 MB/s | 27315 MB/s | + Except for the mostly incompressible JPEG image compression is better and usually in the double digits in terms of percentage reduction over Snappy. @@ -605,29 +567,29 @@ Some examples compared on 16 core CPU, amd64 assembly used: ``` * enwik10 -Default... 10000000000 -> 4761467548 [47.61%]; 1.098s, 8685.6MB/s -Better... 10000000000 -> 4219438251 [42.19%]; 1.925s, 4954.2MB/s -Best... 10000000000 -> 3627364337 [36.27%]; 43.051s, 221.5MB/s +Default... 10000000000 -> 4759950115 [47.60%]; 1.03s, 9263.0MB/s +Better... 10000000000 -> 4084706676 [40.85%]; 2.16s, 4415.4MB/s +Best... 10000000000 -> 3615520079 [36.16%]; 42.259s, 225.7MB/s * github-june-2days-2019.json -Default... 6273951764 -> 1043196283 [16.63%]; 431ms, 13882.3MB/s -Better... 6273951764 -> 949146808 [15.13%]; 547ms, 10938.4MB/s -Best... 6273951764 -> 832855506 [13.27%]; 9.455s, 632.8MB/s +Default... 6273951764 -> 1041700255 [16.60%]; 431ms, 13882.3MB/s +Better... 6273951764 -> 945841238 [15.08%]; 547ms, 10938.4MB/s +Best... 6273951764 -> 826392576 [13.17%]; 9.455s, 632.8MB/s * nyc-taxi-data-10M.csv -Default... 3325605752 -> 1095998837 [32.96%]; 324ms, 9788.7MB/s -Better... 3325605752 -> 954776589 [28.71%]; 491ms, 6459.4MB/s -Best... 3325605752 -> 779098746 [23.43%]; 8.29s, 382.6MB/s +Default... 3325605752 -> 1093516949 [32.88%]; 324ms, 9788.7MB/s +Better... 3325605752 -> 885394158 [26.62%]; 491ms, 6459.4MB/s +Best... 3325605752 -> 773681257 [23.26%]; 8.29s, 412.0MB/s * 10gb.tar -Default... 10065157632 -> 5916578242 [58.78%]; 1.028s, 9337.4MB/s -Better... 10065157632 -> 5649207485 [56.13%]; 1.597s, 6010.6MB/s -Best... 10065157632 -> 5208719802 [51.75%]; 32.78s, 292.8MB/ +Default... 10065157632 -> 5915541066 [58.77%]; 1.028s, 9337.4MB/s +Better... 10065157632 -> 5453844650 [54.19%]; 1.597s, 4862.7MB/s +Best... 10065157632 -> 5192495021 [51.59%]; 32.78s, 308.2MB/ * consensus.db.10gb -Default... 10737418240 -> 4562648848 [42.49%]; 882ms, 11610.0MB/s -Better... 10737418240 -> 4542428129 [42.30%]; 1.533s, 6679.7MB/s -Best... 10737418240 -> 4244773384 [39.53%]; 42.96s, 238.4MB/s +Default... 10737418240 -> 4549762344 [42.37%]; 882ms, 12118.4MB/s +Better... 10737418240 -> 4438535064 [41.34%]; 1.533s, 3500.9MB/s +Best... 10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s ``` Decompression speed should be around the same as using the 'better' compression mode. @@ -648,10 +610,10 @@ If you would like more control, you can use the s2 package as described below: Snappy compatible blocks can be generated with the S2 encoder. Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace -| Snappy | S2 replacement | -|----------------------------|-------------------------| -| snappy.Encode(...) | s2.EncodeSnappy(...) | -| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) | +| Snappy | S2 replacement | +|---------------------------|-----------------------| +| snappy.Encode(...) | s2.EncodeSnappy(...) | +| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) | `s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output. @@ -660,12 +622,12 @@ Compression and speed is typically a bit better `MaxEncodedLen` is also smaller Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z), 53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used: -| Encoder | Size | MB/s | Reduction | -|-----------------------|------------|------------|------------ -| snappy.Encode | 1128706759 | 725.59 | 71.89% | -| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% | -| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% | -| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%**| +| Encoder | Size | MB/s | Reduction | +|-----------------------|------------|------------|------------| +| snappy.Encode | 1128706759 | 725.59 | 71.89% | +| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% | +| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% | +| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%** | ## Streams @@ -851,20 +813,20 @@ The block can be read from the front, but contains information so it can be read Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding), with un-encoded value length of 64 bits, unless other limits are specified. -| Content | Format | -|---------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| -| ID, `[1]byte` | Always 0x99. | -| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. | -| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". | -| UncompressedSize, Varint | Total Uncompressed size. | -| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. | -| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. | -| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. | -| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. | -| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. | -| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. | -| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. | -| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. | +| Content | Format | +|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------| +| ID, `[1]byte` | Always 0x99. | +| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. | +| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". | +| UncompressedSize, Varint | Total Uncompressed size. | +| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. | +| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. | +| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. | +| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. | +| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. | +| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. | +| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. | +| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. | For regular streams the uncompressed offsets are fully predictable, so `HasUncompressedOffsets` allows to specify that compressed blocks all have diff --git a/s2/s2_test.go b/s2/s2_test.go index ab2dafd1aa..bfff1535b2 100644 --- a/s2/s2_test.go +++ b/s2/s2_test.go @@ -1676,8 +1676,6 @@ func benchFile(b *testing.B, i int, decode bool) { } } }) - b.ReportMetric(100*float64(len(Encode(nil, data)))/float64(len(data)), "pct") - } else { b.SetBytes(int64(len(data))) b.ReportAllocs() @@ -1700,6 +1698,7 @@ func benchFile(b *testing.B, i int, decode bool) { }) } b.ReportMetric(100*float64(len(Encode(nil, data)))/float64(len(data)), "pct") + b.ReportMetric(float64(len(Encode(nil, data))), "B") }) b.Run("block-better", func(b *testing.B) { if decode { @@ -1717,7 +1716,6 @@ func benchFile(b *testing.B, i int, decode bool) { } } }) - b.ReportMetric(100*float64(len(EncodeBetter(nil, data)))/float64(len(data)), "pct") } else { b.SetBytes(int64(len(data))) b.ReportAllocs() @@ -1738,8 +1736,9 @@ func benchFile(b *testing.B, i int, decode bool) { } } }) - b.ReportMetric(100*float64(len(EncodeBetter(nil, data)))/float64(len(data)), "pct") } + b.ReportMetric(100*float64(len(EncodeBetter(nil, data)))/float64(len(data)), "pct") + b.ReportMetric(float64(len(EncodeBetter(nil, data))), "B") }) b.Run("block-best", func(b *testing.B) { @@ -1781,6 +1780,7 @@ func benchFile(b *testing.B, i int, decode bool) { }) b.ReportMetric(100*float64(len(EncodeBest(nil, data)))/float64(len(data)), "pct") } + b.ReportMetric(float64(len(EncodeBest(nil, data))), "B") }) } @@ -1827,6 +1827,7 @@ func benchFileSnappy(b *testing.B, i int, decode bool) { }) b.ReportMetric(100*float64(len(EncodeSnappy(nil, data)))/float64(len(data)), "pct") } + b.ReportMetric(float64(len(EncodeSnappy(nil, data))), "B") }) b.Run("s2-snappy-better", func(b *testing.B) { @@ -1870,6 +1871,7 @@ func benchFileSnappy(b *testing.B, i int, decode bool) { }) b.ReportMetric(100*float64(len(EncodeSnappyBetter(nil, data)))/float64(len(data)), "pct") } + b.ReportMetric(float64(len(EncodeSnappyBetter(nil, data))), "B") }) b.Run("s2-snappy-best", func(b *testing.B) { @@ -1911,6 +1913,7 @@ func benchFileSnappy(b *testing.B, i int, decode bool) { }) b.ReportMetric(100*float64(len(EncodeSnappyBest(nil, data)))/float64(len(data)), "pct") } + b.ReportMetric(float64(len(EncodeSnappyBest(nil, data))), "B") }) b.Run("snappy-noasm", func(b *testing.B) { if decode {