Skip to content

Commit 5749d39

Browse files
authored
Merge pull request #22165 from Developer-Ecosystem-Engineering/simd_isnan_isinf_isfinite_signbit
ENH: Implement SIMD versions of isnan,isinf, isfinite and signbit
2 parents 6615d6b + 37b8a53 commit 5749d39

File tree

10 files changed

+657
-309
lines changed

10 files changed

+657
-309
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ numpy/core/src/_simd/_simd_inc.h
216216
# umath module
217217
numpy/core/src/umath/loops_unary.dispatch.c
218218
numpy/core/src/umath/loops_unary_fp.dispatch.c
219+
numpy/core/src/umath/loops_unary_fp_le.dispatch.c
219220
numpy/core/src/umath/loops_arithm_fp.dispatch.c
220221
numpy/core/src/umath/loops_arithmetic.dispatch.c
221222
numpy/core/src/umath/loops_logical.dispatch.c

numpy/core/code_generators/generate_umath.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,7 @@ def english_upper(s):
962962
Ufunc(1, 1, None,
963963
docstrings.get('numpy.core.umath.isnan'),
964964
'PyUFunc_IsFiniteTypeResolver',
965-
TD(noobj, simd=[('avx512_skx', 'fd')], out='?'),
965+
TD(noobj, out='?', dispatch=[('loops_unary_fp_le', inexactvec)]),
966966
),
967967
'isnat':
968968
Ufunc(1, 1, None,
@@ -974,19 +974,19 @@ def english_upper(s):
974974
Ufunc(1, 1, None,
975975
docstrings.get('numpy.core.umath.isinf'),
976976
'PyUFunc_IsFiniteTypeResolver',
977-
TD(noobj, simd=[('avx512_skx', 'fd')], out='?'),
977+
TD(noobj, out='?', dispatch=[('loops_unary_fp_le', inexactvec)]),
978978
),
979979
'isfinite':
980980
Ufunc(1, 1, None,
981981
docstrings.get('numpy.core.umath.isfinite'),
982982
'PyUFunc_IsFiniteTypeResolver',
983-
TD(noobj, simd=[('avx512_skx', 'fd')], out='?'),
983+
TD(noobj, out='?', dispatch=[('loops_unary_fp_le', inexactvec)]),
984984
),
985985
'signbit':
986986
Ufunc(1, 1, None,
987987
docstrings.get('numpy.core.umath.signbit'),
988988
None,
989-
TD(flts, simd=[('avx512_skx', 'fd')], out='?'),
989+
TD(flts, out='?', dispatch=[('loops_unary_fp_le', inexactvec)]),
990990
),
991991
'copysign':
992992
Ufunc(2, 1, None,

numpy/core/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ src_umath = [
758758
src_file.process('src/umath/loops_umath_fp.dispatch.c.src'),
759759
src_file.process('src/umath/loops_unary.dispatch.c.src'),
760760
src_file.process('src/umath/loops_unary_fp.dispatch.c.src'),
761+
src_file.process('src/umath/loops_unary_fp_le.dispatch.c.src'),
761762
src_file.process('src/umath/matmul.c.src'),
762763
src_file.process('src/umath/matmul.h.src'),
763764
src_file.process('src/umath/simd.inc.src'),

numpy/core/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,7 @@ def generate_umath_doc_header(ext, build_dir):
10071007
join('src', 'umath', 'loops.c.src'),
10081008
join('src', 'umath', 'loops_unary.dispatch.c.src'),
10091009
join('src', 'umath', 'loops_unary_fp.dispatch.c.src'),
1010+
join('src', 'umath', 'loops_unary_fp_le.dispatch.c.src'),
10101011
join('src', 'umath', 'loops_arithm_fp.dispatch.c.src'),
10111012
join('src', 'umath', 'loops_arithmetic.dispatch.c.src'),
10121013
join('src', 'umath', 'loops_logical.dispatch.c.src'),

numpy/core/src/umath/loops.c.src

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const *
13061306
* #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
13071307
* #c = f, , l#
13081308
* #C = F, , L#
1309+
* #fd = 1, 1, 0#
1310+
* #VCHK = 1, 1, 0#
13091311
*/
13101312
/**begin repeat1
13111313
* #kind = logical_and, logical_or#
@@ -1342,32 +1344,22 @@ NPY_NO_EXPORT void
13421344
}
13431345
}
13441346

1347+
#if !@fd@
13451348
/**begin repeat1
13461349
* #kind = isnan, isinf, isfinite, signbit#
13471350
* #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit#
13481351
**/
1349-
1350-
/**begin repeat2
1351-
* #ISA = , _avx512_skx#
1352-
* #isa = simd, avx512_skx#
1353-
* #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX512_SKX)#
1354-
**/
1355-
1356-
#if @CHK@
13571352
NPY_NO_EXPORT void
1358-
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1353+
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
13591354
{
1360-
if (!run_@kind@_@isa@_@TYPE@(args, dimensions, steps)) {
1361-
UNARY_LOOP {
1362-
const @type@ in1 = *(@type@ *)ip1;
1363-
*((npy_bool *)op1) = @func@(in1) != 0;
1364-
}
1355+
UNARY_LOOP {
1356+
const @type@ in1 = *(@type@ *)ip1;
1357+
*((npy_bool *)op1) = @func@(in1) != 0;
13651358
}
13661359
npy_clear_floatstatus_barrier((char*)dimensions);
13671360
}
1368-
#endif
1369-
/**end repeat2**/
13701361
/**end repeat1**/
1362+
#endif
13711363

13721364
NPY_NO_EXPORT void
13731365
@TYPE@_spacing(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))

numpy/core/src/umath/loops.h.src

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,20 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
248248
/**end repeat1**/
249249
/**end repeat**/
250250

251+
#ifndef NPY_DISABLE_OPTIMIZATION
252+
#include "loops_unary_fp_le.dispatch.h"
253+
#endif
254+
/**begin repeat
255+
* #TYPE = FLOAT, DOUBLE#
256+
*/
257+
/**begin repeat1
258+
* #kind = isnan, isinf, isfinite, signbit#
259+
*/
260+
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
261+
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)))
262+
/**end repeat1**/
263+
/**end repeat**/
264+
251265
#ifndef NPY_DISABLE_OPTIMIZATION
252266
#include "loops_unary.dispatch.h"
253267
#endif
@@ -400,6 +414,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (
400414
* #c = f, f, , l#
401415
* #C = F, F, , L#
402416
* #half = 1, 0, 0, 0#
417+
* #fd = 0, 1, 1, 0#
403418
*/
404419

405420
/**begin repeat1
@@ -428,13 +443,13 @@ NPY_NO_EXPORT void
428443
/**begin repeat1
429444
* #kind = isnan, isinf, isfinite, signbit, copysign, nextafter, spacing#
430445
* #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit, npy_copysign, nextafter, spacing#
446+
* #dispatched = 1, 1, 1, 1, 0, 0, 0#
431447
**/
432448

433-
/**begin repeat2
434-
* #ISA = , _avx512_skx#
435-
**/
449+
#if !@fd@ || !@dispatched@
436450
NPY_NO_EXPORT void
437-
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
451+
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
452+
#endif
438453
/**end repeat2**/
439454
/**end repeat1**/
440455

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy