From 087f13460a6b470314318b14ef966194c39c4b21 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Mon, 14 Apr 2025 14:20:54 +0200 Subject: [PATCH 1/2] Only disable SLP autovectorization of `_PyEval_EvalFrameDefault` on newer GCCs, as the optimization bug seems to exist only on GCC 12 and later, and before GCC 9 disabling the optimization has a dramatic performance impact. --- Python/ceval.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 47d068edac2743..c484bb8dfbbfa5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -948,11 +948,14 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) #include "generated_cases.c.h" #endif -#if (defined(__GNUC__) && !defined(__clang__)) && defined(__x86_64__) +#if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__) /* - * gh-129987: The SLP autovectorizer can cause poor code generation for opcode - * dispatch, negating any benefit we get from vectorization elsewhere in the - * interpreter loop. + * gh-129987: The SLP autovectorizer can cause poor code generation for + * opcode dispatch in some GCC versions (observed in GCCs 12 through 15), + * negating any benefit we get from vectorization elsewhere in the + * interpreter loop. Disabling it significantly affected older GCC versions + * (prior to GCC 9, 40% performance drop), so we have to selectively disable + * it. */ #define DONT_SLP_VECTORIZE __attribute__((optimize ("no-tree-slp-vectorize"))) #else From fc53ffe466974b1f5e2fdf96552d69cf6b1174a2 Mon Sep 17 00:00:00 2001 From: Thomas Wouters Date: Tue, 15 Apr 2025 11:14:24 +0200 Subject: [PATCH 2/2] Add comment linking to the likely culprit in GCC. --- Python/ceval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index c484bb8dfbbfa5..8b6f8bf2e15f3a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -951,7 +951,8 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) #if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__) /* * gh-129987: The SLP autovectorizer can cause poor code generation for - * opcode dispatch in some GCC versions (observed in GCCs 12 through 15), + * opcode dispatch in some GCC versions (observed in GCCs 12 through 15, + * probably caused by https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115777), * negating any benefit we get from vectorization elsewhere in the * interpreter loop. Disabling it significantly affected older GCC versions * (prior to GCC 9, 40% performance drop), so we have to selectively disable pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy