diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 2e1bb48af8c88e..ed8de888b3bc21 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -155,6 +155,9 @@ active since the start of the Python interpreter, you can use the `-Xperf` optio $ python -Xperf my_script.py +You can also set the :envvar:`PYTHONPERFSUPPORT` to a nonzero value to actiavate perf +profiling mode globally. + There is also support for dynamically activating and deactivating the perf profiling mode by using the APIs in the :mod:`sys` module: diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 5ecc882d818fce..fa2b07e468b3b5 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -582,6 +582,8 @@ Miscellaneous options .. versionadded:: 3.11 The ``-X frozen_modules`` option. + .. versionadded:: 3.12 + The ``-X perf`` option. Options you shouldn't use diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index c2aad85b652e35..f587995b008f68 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -58,7 +58,7 @@ def baz(): script = make_script(script_dir, "perftest", code) with subprocess.Popen( [sys.executable, "-Xperf", script], - universal_newlines=True, + text=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE, ) as process: diff --git a/Objects/perf_trampoline.c b/Objects/perf_trampoline.c index 02206b2786c87f..2cbe3741f26fbc 100644 --- a/Objects/perf_trampoline.c +++ b/Objects/perf_trampoline.c @@ -284,12 +284,23 @@ new_code_arena(void) void *start = &_Py_trampoline_func_start; void *end = &_Py_trampoline_func_end; size_t code_size = end - start; + // TODO: Check the effect of alignment of the code chunks. Initial investigation + // showed that this has no effect on performance in x86-64 or aarch64 and the current + // version has the advantage that the unwinder in GDB can unwind across JIT-ed code. + // + // We should check the values in the future and see if there is a + // measurable performance improvement by rounding trampolines up to 32-bit + // or 64-bit alignment. size_t n_copies = mem_size / code_size; for (size_t i = 0; i < n_copies; i++) { memcpy(memory + i * code_size, start, code_size * sizeof(char)); } // Some systems may prevent us from creating executable code on the fly. + // TODO: Call icache invalidation intrinsics if available: + // __builtin___clear_cache/__clear_cache (depending if clang/gcc). This is + // technically not necessary but we could be missing something so better be + // safe. int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); if (res == -1) { PyErr_SetFromErrno(PyExc_OSError);
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: