-
Notifications
You must be signed in to change notification settings - Fork 493
/
0060-AArch64-Add-memset_zva64.patch
228 lines (216 loc) · 7.66 KB
/
0060-AArch64-Add-memset_zva64.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
From 156e44845f4137d6d3ea6c2824dd459652a7efda Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Thu, 26 Oct 2023 17:07:21 +0100
Subject: [PATCH] AArch64: Add memset_zva64
Add a specialized memset for the common ZVA size of 64 to avoid the
overhead of reading the ZVA size. Since the code is identical to
__memset_falkor, remove the latter.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 3d7090f14b13312320e425b27dcf0fe72de026fd)
---
sysdeps/aarch64/memset.S | 10 ++--
sysdeps/aarch64/multiarch/Makefile | 2 +-
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 4 +-
sysdeps/aarch64/multiarch/memset.c | 9 ++--
sysdeps/aarch64/multiarch/memset_falkor.S | 54 ---------------------
sysdeps/aarch64/multiarch/memset_zva64.S | 27 +++++++++++
6 files changed, 38 insertions(+), 68 deletions(-)
delete mode 100644 sysdeps/aarch64/multiarch/memset_falkor.S
create mode 100644 sysdeps/aarch64/multiarch/memset_zva64.S
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index bf3cf85c8a..bbfb7184c3 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -101,19 +101,19 @@ L(tail64):
ret
L(try_zva):
-#ifdef ZVA_MACRO
- zva_macro
-#else
+#ifndef ZVA64_ONLY
.p2align 3
mrs tmp1, dczid_el0
tbnz tmp1w, 4, L(no_zva)
and tmp1w, tmp1w, 15
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
b.ne L(zva_128)
-
+ nop
+#endif
/* Write the first and last 64 byte aligned block using stp rather
than using DC ZVA. This is faster on some cores.
*/
+ .p2align 4
L(zva_64):
str q0, [dst, 16]
stp q0, q0, [dst, 32]
@@ -123,7 +123,6 @@ L(zva_64):
sub count, dstend, dst /* Count is now 128 too large. */
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
add dst, dst, 128
- nop
1: dc zva, dst
add dst, dst, 64
subs count, count, 64
@@ -134,6 +133,7 @@ L(zva_64):
stp q0, q0, [dstend, -32]
ret
+#ifndef ZVA64_ONLY
.p2align 3
L(zva_128):
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index a1a4de3cd9..171ca5e4cf 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -12,10 +12,10 @@ sysdep_routines += \
memmove_mops \
memset_a64fx \
memset_emag \
- memset_falkor \
memset_generic \
memset_kunpeng \
memset_mops \
+ memset_zva64 \
strlen_asimd \
strlen_generic \
# sysdep_routines
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index 3596d3c8d3..fdd9ea9246 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -54,9 +54,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
IFUNC_IMPL (i, name, memset,
- /* Enable this on non-falkor processors too so that other cores
- can do a comparative analysis with __memset_generic. */
- IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
+ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64)
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
#if HAVE_AARCH64_SVE_ASM
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
index 9193b197dd..6deb6865e5 100644
--- a/sysdeps/aarch64/multiarch/memset.c
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -28,7 +28,7 @@
extern __typeof (__redirect_memset) __libc_memset;
-extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
+extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden;
extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
@@ -47,18 +47,17 @@ select_memset_ifunc (void)
{
if (IS_A64FX (midr) && zva_size == 256)
return __memset_a64fx;
- return __memset_generic;
}
if (IS_KUNPENG920 (midr))
return __memset_kunpeng;
- if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64)
- return __memset_falkor;
-
if (IS_EMAG (midr))
return __memset_emag;
+ if (zva_size == 64)
+ return __memset_zva64;
+
return __memset_generic;
}
diff --git a/sysdeps/aarch64/multiarch/memset_falkor.S b/sysdeps/aarch64/multiarch/memset_falkor.S
deleted file mode 100644
index c6946a8072..0000000000
--- a/sysdeps/aarch64/multiarch/memset_falkor.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Memset for falkor.
- Copyright (C) 2017-2023 Free Software Foundation, Inc.
-
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <memset-reg.h>
-
-/* Reading dczid_el0 is expensive on falkor so move it into the ifunc
- resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to
- use this function only when ZVA is enabled. */
-
-#if IS_IN (libc)
-.macro zva_macro
- .p2align 4
- /* Write the first and last 64 byte aligned block using stp rather
- than using DC ZVA. This is faster on some cores. */
- str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- bic dst, dst, 63
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
- sub count, dstend, dst /* Count is now 128 too large. */
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
- add dst, dst, 128
-1: dc zva, dst
- add dst, dst, 64
- subs count, count, 64
- b.hi 1b
- stp q0, q0, [dst, 0]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
- ret
-.endm
-
-# define ZVA_MACRO zva_macro
-# define MEMSET __memset_falkor
-# include <sysdeps/aarch64/memset.S>
-#endif
diff --git a/sysdeps/aarch64/multiarch/memset_zva64.S b/sysdeps/aarch64/multiarch/memset_zva64.S
new file mode 100644
index 0000000000..13f45fd3d8
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_zva64.S
@@ -0,0 +1,27 @@
+/* Optimized memset for zva size = 64.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define ZVA64_ONLY 1
+#define MEMSET __memset_zva64
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(X)
+
+#include "../memset.S"
--
2.44.0