/
ccompiler_opt.py
2588 lines (2326 loc) · 94.9 KB
/
ccompiler_opt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Provides the `CCompilerOpt` class, used for handling the CPU/hardware
optimization, starting from parsing the command arguments, to managing the
relation between the CPU baseline and dispatch-able features,
also generating the required C headers and ending with compiling
the sources with proper compiler's flags.
`CCompilerOpt` doesn't provide runtime detection for the CPU features,
instead only focuses on the compiler side, but it creates abstract C headers
that can be used later for the final runtime dispatching process."""
import atexit
import inspect
import os
import pprint
import re
import subprocess
import textwrap
# These flags are used to compile any C++ source within Numpy.
# They are chosen to have very few runtime dependencies.
NPY_CXX_FLAGS = [
'-std=c++11', # Minimal standard version
'-D__STDC_VERSION__=0', # for compatibility with C headers
'-fno-exceptions', # no exception support
'-fno-rtti'] # no runtime type information
class _Config:
"""An abstract class holds all configurable attributes of `CCompilerOpt`,
these class attributes can be used to change the default behavior
of `CCompilerOpt` in order to fit other requirements.
Attributes
----------
conf_nocache : bool
Set True to disable memory and file cache.
Default is False.
conf_noopt : bool
Set True to forces the optimization to be disabled,
in this case `CCompilerOpt` tends to generate all
expected headers in order to 'not' break the build.
Default is False.
conf_cache_factors : list
Add extra factors to the primary caching factors. The caching factors
are utilized to determine if there are changes had happened that
requires to discard the cache and re-updating it. The primary factors
are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc).
Default is list of two items, containing the time of last modification
of `ccompiler_opt` and value of attribute "conf_noopt"
conf_tmp_path : str,
The path of temporary directory. Default is auto-created
temporary directory via ``tempfile.mkdtemp()``.
conf_check_path : str
The path of testing files. Each added CPU feature must have a
**C** source file contains at least one intrinsic or instruction that
related to this feature, so it can be tested against the compiler.
Default is ``./distutils/checks``.
conf_target_groups : dict
Extra tokens that can be reached from dispatch-able sources through
the special mark ``@targets``. Default is an empty dictionary.
**Notes**:
- case-insensitive for tokens and group names
- sign '#' must stick in the begin of group name and only within ``@targets``
**Example**:
.. code-block:: console
$ "@targets #avx_group other_tokens" > group_inside.c
>>> CCompilerOpt.conf_target_groups["avx_group"] = \\
"$werror $maxopt avx2 avx512f avx512_skx"
>>> cco = CCompilerOpt(cc_instance)
>>> cco.try_dispatch(["group_inside.c"])
conf_c_prefix : str
The prefix of public C definitions. Default is ``"NPY_"``.
conf_c_prefix_ : str
The prefix of internal C definitions. Default is ``"NPY__"``.
conf_cc_flags : dict
Nested dictionaries defining several compiler flags
that linked to some major functions, the main key
represent the compiler name and sub-keys represent
flags names. Default is already covers all supported
**C** compilers.
Sub-keys explained as follows:
"native": str or None
used by argument option `native`, to detect the current
machine support via the compiler.
"werror": str or None
utilized to treat warning as errors during testing CPU features
against the compiler and also for target's policy `$werror`
via dispatch-able sources.
"maxopt": str or None
utilized for target's policy '$maxopt' and the value should
contains the maximum acceptable optimization by the compiler.
e.g. in gcc `'-O3'`
**Notes**:
* case-sensitive for compiler names and flags
* use space to separate multiple flags
* any flag will tested against the compiler and it will skipped
if it's not applicable.
conf_min_features : dict
A dictionary defines the used CPU features for
argument option `'min'`, the key represent the CPU architecture
name e.g. `'x86'`. Default values provide the best effort
on wide range of users platforms.
**Note**: case-sensitive for architecture names.
conf_features : dict
Nested dictionaries used for identifying the CPU features.
the primary key is represented as a feature name or group name
that gathers several features. Default values covers all
supported features but without the major options like "flags",
these undefined options handle it by method `conf_features_partial()`.
Default value is covers almost all CPU features for *X86*, *IBM/Power64*
and *ARM 7/8*.
Sub-keys explained as follows:
"implies" : str or list, optional,
List of CPU feature names to be implied by it,
the feature name must be defined within `conf_features`.
Default is None.
"flags": str or list, optional
List of compiler flags. Default is None.
"detect": str or list, optional
List of CPU feature names that required to be detected
in runtime. By default, its the feature name or features
in "group" if its specified.
"implies_detect": bool, optional
If True, all "detect" of implied features will be combined.
Default is True. see `feature_detect()`.
"group": str or list, optional
Same as "implies" but doesn't require the feature name to be
defined within `conf_features`.
"interest": int, required
a key for sorting CPU features
"headers": str or list, optional
intrinsics C header file
"disable": str, optional
force disable feature, the string value should contains the
reason of disabling.
"autovec": bool or None, optional
True or False to declare that CPU feature can be auto-vectorized
by the compiler.
By default(None), treated as True if the feature contains at
least one applicable flag. see `feature_can_autovec()`
"extra_checks": str or list, optional
Extra test case names for the CPU feature that need to be tested
against the compiler.
Each test case must have a C file named ``extra_xxxx.c``, where
``xxxx`` is the case name in lower case, under 'conf_check_path'.
It should contain at least one intrinsic or function related to the test case.
If the compiler able to successfully compile the C file then `CCompilerOpt`
will add a C ``#define`` for it into the main dispatch header, e.g.
```#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case.
**NOTES**:
* space can be used as separator with options that supports "str or list"
* case-sensitive for all values and feature name must be in upper-case.
* if flags aren't applicable, its will skipped rather than disable the
CPU feature
* the CPU feature will disabled if the compiler fail to compile
the test file
"""
conf_nocache = False
conf_noopt = False
conf_cache_factors = None
conf_tmp_path = None
conf_check_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "checks"
)
conf_target_groups = {}
conf_c_prefix = 'NPY_'
conf_c_prefix_ = 'NPY__'
conf_cc_flags = dict(
gcc = dict(
# native should always fail on arm and ppc64,
# native usually works only with x86
native = '-march=native',
opt = '-O3',
werror = '-Werror',
),
clang = dict(
native = '-march=native',
opt = "-O3",
# One of the following flags needs to be applicable for Clang to
# guarantee the sanity of the testing process, however in certain
# cases `-Werror` gets skipped during the availability test due to
# "unused arguments" warnings.
# see https://github.com/numpy/numpy/issues/19624
werror = '-Werror=switch -Werror',
),
icc = dict(
native = '-xHost',
opt = '-O3',
werror = '-Werror',
),
iccw = dict(
native = '/QxHost',
opt = '/O3',
werror = '/Werror',
),
msvc = dict(
native = None,
opt = '/O2',
werror = '/WX',
)
)
conf_min_features = dict(
x86 = "SSE SSE2",
x64 = "SSE SSE2 SSE3",
ppc64 = '', # play it safe
ppc64le = "VSX VSX2",
armhf = '', # play it safe
aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
)
conf_features = dict(
# X86
SSE = dict(
interest=1, headers="xmmintrin.h",
# enabling SSE without SSE2 is useless also
# it's non-optional for x86_64
implies="SSE2"
),
SSE2 = dict(interest=2, implies="SSE", headers="emmintrin.h"),
SSE3 = dict(interest=3, implies="SSE2", headers="pmmintrin.h"),
SSSE3 = dict(interest=4, implies="SSE3", headers="tmmintrin.h"),
SSE41 = dict(interest=5, implies="SSSE3", headers="smmintrin.h"),
POPCNT = dict(interest=6, implies="SSE41", headers="popcntintrin.h"),
SSE42 = dict(interest=7, implies="POPCNT"),
AVX = dict(
interest=8, implies="SSE42", headers="immintrin.h",
implies_detect=False
),
XOP = dict(interest=9, implies="AVX", headers="x86intrin.h"),
FMA4 = dict(interest=10, implies="AVX", headers="x86intrin.h"),
F16C = dict(interest=11, implies="AVX"),
FMA3 = dict(interest=12, implies="F16C"),
AVX2 = dict(interest=13, implies="F16C"),
AVX512F = dict(
interest=20, implies="FMA3 AVX2", implies_detect=False,
extra_checks="AVX512F_REDUCE"
),
AVX512CD = dict(interest=21, implies="AVX512F"),
AVX512_KNL = dict(
interest=40, implies="AVX512CD", group="AVX512ER AVX512PF",
detect="AVX512_KNL", implies_detect=False
),
AVX512_KNM = dict(
interest=41, implies="AVX512_KNL",
group="AVX5124FMAPS AVX5124VNNIW AVX512VPOPCNTDQ",
detect="AVX512_KNM", implies_detect=False
),
AVX512_SKX = dict(
interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ",
detect="AVX512_SKX", implies_detect=False,
extra_checks="AVX512BW_MASK AVX512DQ_MASK"
),
AVX512_CLX = dict(
interest=43, implies="AVX512_SKX", group="AVX512VNNI",
detect="AVX512_CLX"
),
AVX512_CNL = dict(
interest=44, implies="AVX512_SKX", group="AVX512IFMA AVX512VBMI",
detect="AVX512_CNL", implies_detect=False
),
AVX512_ICL = dict(
interest=45, implies="AVX512_CLX AVX512_CNL",
group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ",
detect="AVX512_ICL", implies_detect=False
),
# IBM/Power
## Power7/ISA 2.06
VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"),
## Power8/ISA 2.07
VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
## Power9/ISA 3.00
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
# ARM
NEON = dict(interest=1, headers="arm_neon.h"),
NEON_FP16 = dict(interest=2, implies="NEON"),
## FMA
NEON_VFPV4 = dict(interest=3, implies="NEON_FP16"),
## Advanced SIMD
ASIMD = dict(interest=4, implies="NEON_FP16 NEON_VFPV4", implies_detect=False),
## ARMv8.2 half-precision & vector arithm
ASIMDHP = dict(interest=5, implies="ASIMD"),
## ARMv8.2 dot product
ASIMDDP = dict(interest=6, implies="ASIMD"),
## ARMv8.2 Single & half-precision Multiply
ASIMDFHM = dict(interest=7, implies="ASIMDHP"),
)
def conf_features_partial(self):
"""Return a dictionary of supported CPU features by the platform,
and accumulate the rest of undefined options in `conf_features`,
the returned dict has same rules and notes in
class attribute `conf_features`, also its override
any options that been set in 'conf_features'.
"""
if self.cc_noopt:
# optimization is disabled
return {}
on_x86 = self.cc_on_x86 or self.cc_on_x64
is_unix = self.cc_is_gcc or self.cc_is_clang
if on_x86 and is_unix: return dict(
SSE = dict(flags="-msse"),
SSE2 = dict(flags="-msse2"),
SSE3 = dict(flags="-msse3"),
SSSE3 = dict(flags="-mssse3"),
SSE41 = dict(flags="-msse4.1"),
POPCNT = dict(flags="-mpopcnt"),
SSE42 = dict(flags="-msse4.2"),
AVX = dict(flags="-mavx"),
F16C = dict(flags="-mf16c"),
XOP = dict(flags="-mxop"),
FMA4 = dict(flags="-mfma4"),
FMA3 = dict(flags="-mfma"),
AVX2 = dict(flags="-mavx2"),
AVX512F = dict(flags="-mavx512f"),
AVX512CD = dict(flags="-mavx512cd"),
AVX512_KNL = dict(flags="-mavx512er -mavx512pf"),
AVX512_KNM = dict(
flags="-mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq"
),
AVX512_SKX = dict(flags="-mavx512vl -mavx512bw -mavx512dq"),
AVX512_CLX = dict(flags="-mavx512vnni"),
AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"),
AVX512_ICL = dict(
flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq"
)
)
if on_x86 and self.cc_is_icc: return dict(
SSE = dict(flags="-msse"),
SSE2 = dict(flags="-msse2"),
SSE3 = dict(flags="-msse3"),
SSSE3 = dict(flags="-mssse3"),
SSE41 = dict(flags="-msse4.1"),
POPCNT = {},
SSE42 = dict(flags="-msse4.2"),
AVX = dict(flags="-mavx"),
F16C = {},
XOP = dict(disable="Intel Compiler doesn't support it"),
FMA4 = dict(disable="Intel Compiler doesn't support it"),
# Intel Compiler doesn't support AVX2 or FMA3 independently
FMA3 = dict(
implies="F16C AVX2", flags="-march=core-avx2"
),
AVX2 = dict(implies="FMA3", flags="-march=core-avx2"),
# Intel Compiler doesn't support AVX512F or AVX512CD independently
AVX512F = dict(
implies="AVX2 AVX512CD", flags="-march=common-avx512"
),
AVX512CD = dict(
implies="AVX2 AVX512F", flags="-march=common-avx512"
),
AVX512_KNL = dict(flags="-xKNL"),
AVX512_KNM = dict(flags="-xKNM"),
AVX512_SKX = dict(flags="-xSKYLAKE-AVX512"),
AVX512_CLX = dict(flags="-xCASCADELAKE"),
AVX512_CNL = dict(flags="-xCANNONLAKE"),
AVX512_ICL = dict(flags="-xICELAKE-CLIENT"),
)
if on_x86 and self.cc_is_iccw: return dict(
SSE = dict(flags="/arch:SSE"),
SSE2 = dict(flags="/arch:SSE2"),
SSE3 = dict(flags="/arch:SSE3"),
SSSE3 = dict(flags="/arch:SSSE3"),
SSE41 = dict(flags="/arch:SSE4.1"),
POPCNT = {},
SSE42 = dict(flags="/arch:SSE4.2"),
AVX = dict(flags="/arch:AVX"),
F16C = {},
XOP = dict(disable="Intel Compiler doesn't support it"),
FMA4 = dict(disable="Intel Compiler doesn't support it"),
# Intel Compiler doesn't support FMA3 or AVX2 independently
FMA3 = dict(
implies="F16C AVX2", flags="/arch:CORE-AVX2"
),
AVX2 = dict(
implies="FMA3", flags="/arch:CORE-AVX2"
),
# Intel Compiler doesn't support AVX512F or AVX512CD independently
AVX512F = dict(
implies="AVX2 AVX512CD", flags="/Qx:COMMON-AVX512"
),
AVX512CD = dict(
implies="AVX2 AVX512F", flags="/Qx:COMMON-AVX512"
),
AVX512_KNL = dict(flags="/Qx:KNL"),
AVX512_KNM = dict(flags="/Qx:KNM"),
AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"),
AVX512_CLX = dict(flags="/Qx:CASCADELAKE"),
AVX512_CNL = dict(flags="/Qx:CANNONLAKE"),
AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT")
)
if on_x86 and self.cc_is_msvc: return dict(
SSE = dict(flags="/arch:SSE") if self.cc_on_x86 else {},
SSE2 = dict(flags="/arch:SSE2") if self.cc_on_x86 else {},
SSE3 = {},
SSSE3 = {},
SSE41 = {},
POPCNT = dict(headers="nmmintrin.h"),
SSE42 = {},
AVX = dict(flags="/arch:AVX"),
F16C = {},
XOP = dict(headers="ammintrin.h"),
FMA4 = dict(headers="ammintrin.h"),
# MSVC doesn't support FMA3 or AVX2 independently
FMA3 = dict(
implies="F16C AVX2", flags="/arch:AVX2"
),
AVX2 = dict(
implies="F16C FMA3", flags="/arch:AVX2"
),
# MSVC doesn't support AVX512F or AVX512CD independently,
# always generate instructions belong to (VL/VW/DQ)
AVX512F = dict(
implies="AVX2 AVX512CD AVX512_SKX", flags="/arch:AVX512"
),
AVX512CD = dict(
implies="AVX512F AVX512_SKX", flags="/arch:AVX512"
),
AVX512_KNL = dict(
disable="MSVC compiler doesn't support it"
),
AVX512_KNM = dict(
disable="MSVC compiler doesn't support it"
),
AVX512_SKX = dict(flags="/arch:AVX512"),
AVX512_CLX = {},
AVX512_CNL = {},
AVX512_ICL = {}
)
on_power = self.cc_on_ppc64le or self.cc_on_ppc64
if on_power:
partial = dict(
VSX = dict(
implies=("VSX2" if self.cc_on_ppc64le else ""),
flags="-mvsx"
),
VSX2 = dict(
flags="-mcpu=power8", implies_detect=False
),
VSX3 = dict(
flags="-mcpu=power9 -mtune=power9", implies_detect=False
)
)
if self.cc_is_clang:
partial["VSX"]["flags"] = "-maltivec -mvsx"
partial["VSX2"]["flags"] = "-mpower8-vector"
partial["VSX3"]["flags"] = "-mpower9-vector"
return partial
if self.cc_on_aarch64 and is_unix: return dict(
NEON = dict(
implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True
),
NEON_FP16 = dict(
implies="NEON NEON_VFPV4 ASIMD", autovec=True
),
NEON_VFPV4 = dict(
implies="NEON NEON_FP16 ASIMD", autovec=True
),
ASIMD = dict(
implies="NEON NEON_FP16 NEON_VFPV4", autovec=True
),
ASIMDHP = dict(
flags="-march=armv8.2-a+fp16"
),
ASIMDDP = dict(
flags="-march=armv8.2-a+dotprod"
),
ASIMDFHM = dict(
flags="-march=armv8.2-a+fp16fml"
),
)
if self.cc_on_armhf and is_unix: return dict(
NEON = dict(
flags="-mfpu=neon"
),
NEON_FP16 = dict(
flags="-mfpu=neon-fp16 -mfp16-format=ieee"
),
NEON_VFPV4 = dict(
flags="-mfpu=neon-vfpv4",
),
ASIMD = dict(
flags="-mfpu=neon-fp-armv8 -march=armv8-a+simd",
),
ASIMDHP = dict(
flags="-march=armv8.2-a+fp16"
),
ASIMDDP = dict(
flags="-march=armv8.2-a+dotprod",
),
ASIMDFHM = dict(
flags="-march=armv8.2-a+fp16fml"
)
)
# TODO: ARM MSVC
return {}
def __init__(self):
if self.conf_tmp_path is None:
import shutil
import tempfile
tmp = tempfile.mkdtemp()
def rm_temp():
try:
shutil.rmtree(tmp)
except OSError:
pass
atexit.register(rm_temp)
self.conf_tmp_path = tmp
if self.conf_cache_factors is None:
self.conf_cache_factors = [
os.path.getmtime(__file__),
self.conf_nocache
]
class _Distutils:
"""A helper class that provides a collection of fundamental methods
implemented in a top of Python and NumPy Distutils.
The idea behind this class is to gather all methods that it may
need to override in case of reuse 'CCompilerOpt' in environment
different than of what NumPy has.
Parameters
----------
ccompiler : `CCompiler`
The generate instance that returned from `distutils.ccompiler.new_compiler()`.
"""
def __init__(self, ccompiler):
self._ccompiler = ccompiler
def dist_compile(self, sources, flags, ccompiler=None, **kwargs):
"""Wrap CCompiler.compile()"""
assert(isinstance(sources, list))
assert(isinstance(flags, list))
flags = kwargs.pop("extra_postargs", []) + flags
if not ccompiler:
ccompiler = self._ccompiler
return ccompiler.compile(sources, extra_postargs=flags, **kwargs)
def dist_test(self, source, flags, macros=[]):
"""Return True if 'CCompiler.compile()' able to compile
a source file with certain flags.
"""
assert(isinstance(source, str))
from distutils.errors import CompileError
cc = self._ccompiler;
bk_spawn = getattr(cc, 'spawn', None)
if bk_spawn:
cc_type = getattr(self._ccompiler, "compiler_type", "")
if cc_type in ("msvc",):
setattr(cc, 'spawn', self._dist_test_spawn_paths)
else:
setattr(cc, 'spawn', self._dist_test_spawn)
test = False
try:
self.dist_compile(
[source], flags, macros=macros, output_dir=self.conf_tmp_path
)
test = True
except CompileError as e:
self.dist_log(str(e), stderr=True)
if bk_spawn:
setattr(cc, 'spawn', bk_spawn)
return test
def dist_info(self):
"""
Return a tuple containing info about (platform, compiler, extra_args),
required by the abstract class '_CCompiler' for discovering the
platform environment. This is also used as a cache factor in order
to detect any changes happening from outside.
"""
if hasattr(self, "_dist_info"):
return self._dist_info
cc_type = getattr(self._ccompiler, "compiler_type", '')
if cc_type in ("intelem", "intelemw"):
platform = "x86_64"
elif cc_type in ("intel", "intelw", "intele"):
platform = "x86"
else:
from distutils.util import get_platform
platform = get_platform()
cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", ''))
if not cc_type or cc_type == "unix":
if hasattr(cc_info, "__iter__"):
compiler = cc_info[0]
else:
compiler = str(cc_info)
else:
compiler = cc_type
if hasattr(cc_info, "__iter__") and len(cc_info) > 1:
extra_args = ' '.join(cc_info[1:])
else:
extra_args = os.environ.get("CFLAGS", "")
extra_args += os.environ.get("CPPFLAGS", "")
self._dist_info = (platform, compiler, extra_args)
return self._dist_info
@staticmethod
def dist_error(*args):
"""Raise a compiler error"""
from distutils.errors import CompileError
raise CompileError(_Distutils._dist_str(*args))
@staticmethod
def dist_fatal(*args):
"""Raise a distutils error"""
from distutils.errors import DistutilsError
raise DistutilsError(_Distutils._dist_str(*args))
@staticmethod
def dist_log(*args, stderr=False):
"""Print a console message"""
from numpy.distutils import log
out = _Distutils._dist_str(*args)
if stderr:
log.warn(out)
else:
log.info(out)
@staticmethod
def dist_load_module(name, path):
"""Load a module from file, required by the abstract class '_Cache'."""
from .misc_util import exec_mod_from_location
try:
return exec_mod_from_location(name, path)
except Exception as e:
_Distutils.dist_log(e, stderr=True)
return None
@staticmethod
def _dist_str(*args):
"""Return a string to print by log and errors."""
def to_str(arg):
if not isinstance(arg, str) and hasattr(arg, '__iter__'):
ret = []
for a in arg:
ret.append(to_str(a))
return '('+ ' '.join(ret) + ')'
return str(arg)
stack = inspect.stack()[2]
start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno)
out = ' '.join([
to_str(a)
for a in (*args,)
])
return start + out
def _dist_test_spawn_paths(self, cmd, display=None):
"""
Fix msvc SDK ENV path same as distutils do
without it we get c1: fatal error C1356: unable to find mspdbcore.dll
"""
if not hasattr(self._ccompiler, "_paths"):
self._dist_test_spawn(cmd)
return
old_path = os.getenv("path")
try:
os.environ["path"] = self._ccompiler._paths
self._dist_test_spawn(cmd)
finally:
os.environ["path"] = old_path
_dist_warn_regex = re.compile(
# intel and msvc compilers don't raise
# fatal errors when flags are wrong or unsupported
".*("
"warning D9002|" # msvc, it should be work with any language.
"invalid argument for option" # intel
").*"
)
@staticmethod
def _dist_test_spawn(cmd, display=None):
try:
o = subprocess.check_output(cmd, stderr=subprocess.STDOUT,
universal_newlines=True)
if o and re.match(_Distutils._dist_warn_regex, o):
_Distutils.dist_error(
"Flags in command", cmd ,"aren't supported by the compiler"
", output -> \n%s" % o
)
except subprocess.CalledProcessError as exc:
o = exc.output
s = exc.returncode
except OSError as e:
o = e
s = 127
else:
return None
_Distutils.dist_error(
"Command", cmd, "failed with exit status %d output -> \n%s" % (
s, o
))
_share_cache = {}
class _Cache:
"""An abstract class handles caching functionality, provides two
levels of caching, in-memory by share instances attributes among
each other and by store attributes into files.
**Note**:
any attributes that start with ``_`` or ``conf_`` will be ignored.
Parameters
----------
cache_path: str or None
The path of cache file, if None then cache in file will disabled.
*factors:
The caching factors that need to utilize next to `conf_cache_factors`.
Attributes
----------
cache_private: set
Hold the attributes that need be skipped from "in-memory cache".
cache_infile: bool
Utilized during initializing this class, to determine if the cache was able
to loaded from the specified cache path in 'cache_path'.
"""
# skip attributes from cache
_cache_ignore = re.compile("^(_|conf_)")
def __init__(self, cache_path=None, *factors):
self.cache_me = {}
self.cache_private = set()
self.cache_infile = False
self._cache_path = None
if self.conf_nocache:
self.dist_log("cache is disabled by `Config`")
return
self._cache_hash = self.cache_hash(*factors, *self.conf_cache_factors)
self._cache_path = cache_path
if cache_path:
if os.path.exists(cache_path):
self.dist_log("load cache from file ->", cache_path)
cache_mod = self.dist_load_module("cache", cache_path)
if not cache_mod:
self.dist_log(
"unable to load the cache file as a module",
stderr=True
)
elif not hasattr(cache_mod, "hash") or \
not hasattr(cache_mod, "data"):
self.dist_log("invalid cache file", stderr=True)
elif self._cache_hash == cache_mod.hash:
self.dist_log("hit the file cache")
for attr, val in cache_mod.data.items():
setattr(self, attr, val)
self.cache_infile = True
else:
self.dist_log("miss the file cache")
if not self.cache_infile:
other_cache = _share_cache.get(self._cache_hash)
if other_cache:
self.dist_log("hit the memory cache")
for attr, val in other_cache.__dict__.items():
if attr in other_cache.cache_private or \
re.match(self._cache_ignore, attr):
continue
setattr(self, attr, val)
_share_cache[self._cache_hash] = self
atexit.register(self.cache_flush)
def __del__(self):
for h, o in _share_cache.items():
if o == self:
_share_cache.pop(h)
break
def cache_flush(self):
"""
Force update the cache.
"""
if not self._cache_path:
return
# TODO: don't write if the cache doesn't change
self.dist_log("write cache to path ->", self._cache_path)
cdict = self.__dict__.copy()
for attr in self.__dict__.keys():
if re.match(self._cache_ignore, attr):
cdict.pop(attr)
d = os.path.dirname(self._cache_path)
if not os.path.exists(d):
os.makedirs(d)
repr_dict = pprint.pformat(cdict, compact=True)
with open(self._cache_path, "w") as f:
f.write(textwrap.dedent("""\
# AUTOGENERATED DON'T EDIT
# Please make changes to the code generator \
(distutils/ccompiler_opt.py)
hash = {}
data = \\
""").format(self._cache_hash))
f.write(repr_dict)
def cache_hash(self, *factors):
# is there a built-in non-crypto hash?
# sdbm
chash = 0
for f in factors:
for char in str(f):
chash = ord(char) + (chash << 6) + (chash << 16) - chash
chash &= 0xFFFFFFFF
return chash
@staticmethod
def me(cb):
"""
A static method that can be treated as a decorator to
dynamically cache certain methods.
"""
def cache_wrap_me(self, *args, **kwargs):
# good for normal args
cache_key = str((
cb.__name__, *args, *kwargs.keys(), *kwargs.values()
))
if cache_key in self.cache_me:
return self.cache_me[cache_key]
ccb = cb(self, *args, **kwargs)
self.cache_me[cache_key] = ccb
return ccb
return cache_wrap_me
class _CCompiler:
"""A helper class for `CCompilerOpt` containing all utilities that
related to the fundamental compiler's functions.
Attributes
----------
cc_on_x86 : bool
True when the target architecture is 32-bit x86
cc_on_x64 : bool
True when the target architecture is 64-bit x86
cc_on_ppc64 : bool
True when the target architecture is 64-bit big-endian PowerPC
cc_on_armhf : bool
True when the target architecture is 32-bit ARMv7+
cc_on_aarch64 : bool
True when the target architecture is 64-bit Armv8-a+
cc_on_noarch : bool
True when the target architecture is unknown or not supported
cc_is_gcc : bool
True if the compiler is GNU or
if the compiler is unknown
cc_is_clang : bool
True if the compiler is Clang
cc_is_icc : bool
True if the compiler is Intel compiler (unix like)
cc_is_iccw : bool
True if the compiler is Intel compiler (msvc like)
cc_is_nocc : bool
True if the compiler isn't supported directly,
Note: that cause a fail-back to gcc
cc_has_debug : bool
True if the compiler has debug flags
cc_has_native : bool
True if the compiler has native flags
cc_noopt : bool
True if the compiler has definition 'DISABLE_OPT*',
or 'cc_on_noarch' is True
cc_march : str
The target architecture name, or "unknown" if
the architecture isn't supported
cc_name : str
The compiler name, or "unknown" if the compiler isn't supported
cc_flags : dict
Dictionary containing the initialized flags of `_Config.conf_cc_flags`
"""
def __init__(self):
if hasattr(self, "cc_is_cached"):
return
# attr regex
detect_arch = (
("cc_on_x64", ".*(x|x86_|amd)64.*"),
("cc_on_x86", ".*(win32|x86|i386|i686).*"),
("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"),
("cc_on_ppc64", ".*(powerpc|ppc)64.*"),
("cc_on_aarch64", ".*(aarch64|arm64).*"),
("cc_on_armhf", ".*arm.*"),
# undefined platform
("cc_on_noarch", ""),
)
detect_compiler = (
("cc_is_gcc", r".*(gcc|gnu\-g).*"),
("cc_is_clang", ".*clang.*"),
("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like
("cc_is_icc", ".*(intel|icc).*"), # intel unix like
("cc_is_msvc", ".*msvc.*"),
# undefined compiler will be treat it as gcc
("cc_is_nocc", ""),
)
detect_args = (
("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"),
("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),
# in case if the class run with -DNPY_DISABLE_OPTIMIZATION
("cc_noopt", ".*DISABLE_OPT.*"),
)
dist_info = self.dist_info()
platform, compiler_info, extra_args = dist_info
# set False to all attrs
for section in (detect_arch, detect_compiler, detect_args):
for attr, rgex in section:
setattr(self, attr, False)
for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)):
for attr, rgex in detect:
if rgex and not re.match(rgex, searchin, re.IGNORECASE):
continue
setattr(self, attr, True)
break
for attr, rgex in detect_args:
if rgex and not re.match(rgex, extra_args, re.IGNORECASE):
continue
setattr(self, attr, True)
if self.cc_on_noarch:
self.dist_log(
"unable to detect CPU architecture which lead to disable the optimization. "
f"check dist_info:<<\n{dist_info}\n>>",
stderr=True
)
self.cc_noopt = True
if self.conf_noopt:
self.dist_log("Optimization is disabled by the Config", stderr=True)
self.cc_noopt = True
if self.cc_is_nocc:
"""
mingw can be treated as a gcc, and also xlc even if it based on clang,
but still has the same gcc optimization flags.
"""
self.dist_log(
"unable to detect compiler type which leads to treating it as GCC. "
"this is a normal behavior if you're using gcc-like compiler such as MinGW or IBM/XLC."
f"check dist_info:<<\n{dist_info}\n>>",
stderr=True
)
self.cc_is_gcc = True
self.cc_march = "unknown"
for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"):
if getattr(self, "cc_on_" + arch):
self.cc_march = arch
break
self.cc_name = "unknown"
for name in ("gcc", "clang", "iccw", "icc", "msvc"):