-
Notifications
You must be signed in to change notification settings - Fork 493
/
0057-AArch64-Add-support-for-MOPS-memcpy-memmove-memset.patch
314 lines (302 loc) · 11.4 KB
/
0057-AArch64-Add-support-for-MOPS-memcpy-memmove-memset.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
From d8a2b56b4fdf39488eb8a94f8b1064e262708b6f Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Tue, 17 Oct 2023 16:54:21 +0100
Subject: [PATCH] AArch64: Add support for MOPS memcpy/memmove/memset
Add support for MOPS in cpu_features and INIT_ARCH. Add ifuncs using MOPS for
memcpy, memmove and memset (use .inst for now so it works with all binutils
versions without needing complex configure and conditional compilation).
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
(cherry picked from commit 2bd00179885928fd95fcabfafc50e7b5c6e660d2)
---
sysdeps/aarch64/multiarch/Makefile | 3 ++
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 3 ++
sysdeps/aarch64/multiarch/init-arch.h | 4 +-
sysdeps/aarch64/multiarch/memcpy.c | 4 ++
sysdeps/aarch64/multiarch/memcpy_mops.S | 39 +++++++++++++++++++
sysdeps/aarch64/multiarch/memmove.c | 4 ++
sysdeps/aarch64/multiarch/memmove_mops.S | 39 +++++++++++++++++++
sysdeps/aarch64/multiarch/memset.c | 4 ++
sysdeps/aarch64/multiarch/memset_mops.S | 38 ++++++++++++++++++
.../unix/sysv/linux/aarch64/cpu-features.c | 3 ++
.../unix/sysv/linux/aarch64/cpu-features.h | 1 +
11 files changed, 141 insertions(+), 1 deletion(-)
create mode 100644 sysdeps/aarch64/multiarch/memcpy_mops.S
create mode 100644 sysdeps/aarch64/multiarch/memmove_mops.S
create mode 100644 sysdeps/aarch64/multiarch/memset_mops.S
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index 223777d94e..e6099548b9 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -5,14 +5,17 @@ sysdep_routines += \
memcpy_a64fx \
memcpy_falkor \
memcpy_generic \
+ memcpy_mops \
memcpy_sve \
memcpy_thunderx \
memcpy_thunderx2 \
+ memmove_mops \
memset_a64fx \
memset_emag \
memset_falkor \
memset_generic \
memset_kunpeng \
+ memset_mops \
strlen_asimd \
strlen_mte \
# sysdep_routines
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index d274f01fdb..da7f115377 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -41,6 +41,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx)
IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_sve)
#endif
+ IFUNC_IMPL_ADD (array, i, memcpy, mops, __memcpy_mops)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
IFUNC_IMPL (i, name, memmove,
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
@@ -50,6 +51,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx)
IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_sve)
#endif
+ IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
IFUNC_IMPL (i, name, memset,
/* Enable this on non-falkor processors too so that other cores
@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#if HAVE_AARCH64_SVE_ASM
IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx)
#endif
+ IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops)
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
IFUNC_IMPL (i, name, memchr,
IFUNC_IMPL_ADD (array, i, memchr, !mte, __memchr_nosimd)
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index 6de081e381..e23e6ff290 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -35,4 +35,6 @@
bool __attribute__((unused)) mte = \
MTE_ENABLED (); \
bool __attribute__((unused)) sve = \
- GLRO(dl_aarch64_cpu_features).sve;
+ GLRO(dl_aarch64_cpu_features).sve; \
+ bool __attribute__((unused)) mops = \
+ GLRO(dl_aarch64_cpu_features).mops;
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 3aae915c5f..9aace954cb 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -34,12 +34,16 @@ extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_a64fx attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_sve attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_mops attribute_hidden;
static inline __typeof (__redirect_memcpy) *
select_memcpy_ifunc (void)
{
INIT_ARCH ();
+ if (mops)
+ return __memcpy_mops;
+
if (sve && HAVE_AARCH64_SVE_ASM)
{
if (IS_A64FX (midr))
diff --git a/sysdeps/aarch64/multiarch/memcpy_mops.S b/sysdeps/aarch64/multiarch/memcpy_mops.S
new file mode 100644
index 0000000000..4685629664
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memcpy_mops.S
@@ -0,0 +1,39 @@
+/* Optimized memcpy for MOPS.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * AArch64, MOPS.
+ *
+ */
+
+ENTRY (__memcpy_mops)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
+
+ mov x3, x0
+ .inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */
+ .inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */
+ .inst 0x19810443 /* cpyfe [x3]!, [x1]!, x2! */
+ ret
+
+END (__memcpy_mops)
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index 312f90f111..fd346e7b73 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -34,12 +34,16 @@ extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_a64fx attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_sve attribute_hidden;
+extern __typeof (__redirect_memmove) __memmove_mops attribute_hidden;
static inline __typeof (__redirect_memmove) *
select_memmove_ifunc (void)
{
INIT_ARCH ();
+ if (mops)
+ return __memmove_mops;
+
if (sve && HAVE_AARCH64_SVE_ASM)
{
if (IS_A64FX (midr))
diff --git a/sysdeps/aarch64/multiarch/memmove_mops.S b/sysdeps/aarch64/multiarch/memmove_mops.S
new file mode 100644
index 0000000000..c5ea66be3a
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memmove_mops.S
@@ -0,0 +1,39 @@
+/* Optimized memmove for MOPS.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * AArch64, MOPS.
+ *
+ */
+
+ENTRY (__memmove_mops)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
+
+ mov x3, x0
+ .inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */
+ .inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */
+ .inst 0x1d810443 /* cpye [x3]!, [x1]!, x2! */
+ ret
+
+END (__memmove_mops)
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
index f9c81d3d8e..23fc66e158 100644
--- a/sysdeps/aarch64/multiarch/memset.c
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -33,12 +33,16 @@ extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
+extern __typeof (__redirect_memset) __memset_mops attribute_hidden;
static inline __typeof (__redirect_memset) *
select_memset_ifunc (void)
{
INIT_ARCH ();
+ if (mops)
+ return __memset_mops;
+
if (sve && HAVE_AARCH64_SVE_ASM)
{
if (IS_A64FX (midr) && zva_size == 256)
diff --git a/sysdeps/aarch64/multiarch/memset_mops.S b/sysdeps/aarch64/multiarch/memset_mops.S
new file mode 100644
index 0000000000..ca820b8636
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_mops.S
@@ -0,0 +1,38 @@
+/* Optimized memset for MOPS.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * AArch64, MOPS.
+ *
+ */
+
+ENTRY (__memset_mops)
+ PTR_ARG (0)
+ SIZE_ARG (2)
+
+ mov x3, x0
+ .inst 0x19c10443 /* setp [x3]!, x2!, x1 */
+ .inst 0x19c14443 /* setm [x3]!, x2!, x1 */
+ .inst 0x19c18443 /* sete [x3]!, x2!, x1 */
+ ret
+
+END (__memset_mops)
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index dc09c1c827..233d5b2407 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -120,4 +120,7 @@ init_cpu_features (struct cpu_features *cpu_features)
/* Check if SVE is supported. */
cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
+
+ /* Check if MOPS is supported. */
+ cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
}
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index d67d286b53..40b709677d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -76,6 +76,7 @@ struct cpu_features
/* Currently, the GLIBC memory tagging tunable only defines 8 bits. */
uint8_t mte_state;
bool sve;
+ bool mops;
};
#endif /* _CPU_FEATURES_AARCH64_H */
--
2.44.0