From 91d9d326ec882f0c3cabffe16a8222fb057c4874 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 12 Feb 2022 04:32:57 -0800 Subject: [PATCH] s2: Fix GOAMD64_v3 (#492) There is no #elifdef - it just seems to be ignored. Use workaround for emitting. --- s2/_generate/gen.go | 8 +- s2/encodeblock_amd64.s | 310 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 286 insertions(+), 32 deletions(-) diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index 47d58ea1e6..9ea8eeded0 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -2507,8 +2507,14 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re // 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c // 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c TZCNTQ(tmp, tmp) - Comment("#elifdef GOAMD64_v4") + Comment("#define TZCNTQ_EMITTED 1") + Comment("#endif\n") + Comment("#ifdef GOAMD64_v4") TZCNTQ(tmp, tmp) + Comment("#define TZCNTQ_EMITTED 1") + Comment("#endif\n") + Comment("#ifdef TZCNTQ_EMITTED") + Comment("#undef TZCNTQ_EMITTED") Comment("#else") BSFQ(tmp, tmp) Comment("#endif") diff --git a/s2/encodeblock_amd64.s b/s2/encodeblock_amd64.s index cc35dce373..729dbf536c 100644 --- a/s2/encodeblock_amd64.s +++ b/s2/encodeblock_amd64.s @@ -253,9 +253,17 @@ matchlen_loopback_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -783,9 +791,17 @@ matchlen_loopback_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -1450,9 +1466,17 @@ matchlen_loopback_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -1939,9 +1963,17 @@ matchlen_loopback_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -2565,9 +2597,17 @@ matchlen_loopback_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -2939,9 +2979,17 @@ matchlen_loopback_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -3462,9 +3510,17 @@ matchlen_loopback_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -3836,9 +3892,17 @@ matchlen_loopback_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -4359,9 +4423,17 @@ matchlen_loopback_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -4723,9 +4795,17 @@ matchlen_loopback_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -5145,9 +5225,17 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -6126,9 +6214,17 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -7042,9 +7138,17 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -7812,9 +7916,17 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -8582,9 +8694,17 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -9464,9 +9584,17 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R10, R10 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -9790,9 +9918,17 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -10312,9 +10448,17 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R10, R10 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -10595,9 +10739,17 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -11074,9 +11226,17 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R10, R10 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -11357,9 +11517,17 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -11836,9 +12004,17 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R10, R10 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -12119,9 +12295,17 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -12598,9 +12782,17 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R10, R10 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R10, R10 @@ -12879,9 +13071,17 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R9, R9 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R9, R9 @@ -13261,9 +13461,17 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -13860,9 +14068,17 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -14388,9 +14604,17 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -14916,9 +15140,17 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -15444,9 +15676,17 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R11, R11 -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ R11, R11 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ R11, R11 @@ -16404,9 +16644,17 @@ matchlen_loopback_standalone: #ifdef GOAMD64_v3 TZCNTQ BX, BX -#elifdef GOAMD64_v4 +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef GOAMD64_v4 TZCNTQ BX, BX +#define TZCNTQ_EMITTED 1 +#endif + +#ifdef TZCNTQ_EMITTED +#undef TZCNTQ_EMITTED #else BSFQ BX, BX