Skip to content

Commit 22fc892

Browse files
committed
Allow Linux perf profiler to see Python calls
1 parent 6ec57e7 commit 22fc892

17 files changed

+350
-2
lines changed

Include/cpython/initconfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ typedef struct PyConfig {
142142
unsigned long hash_seed;
143143
int faulthandler;
144144
int tracemalloc;
145+
int perf_profiling;
145146
int import_time;
146147
int code_debug_ranges;
147148
int show_ref_count;

Include/internal/pycore_ceval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ extern PyObject* _PyEval_BuiltinsFromGlobals(
6565
PyThreadState *tstate,
6666
PyObject *globals);
6767

68+
extern int _PyPerfTrampoline_Init(int activate);
6869

6970
static inline PyObject*
7071
_PyEval_EvalFrame(PyThreadState *tstate, struct _PyInterpreterFrame *frame, int throwflag)

Makefile.pre.in

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,9 @@ OBJECT_OBJS= \
475475
Objects/unicodeobject.o \
476476
Objects/unicodectype.o \
477477
Objects/unionobject.o \
478-
Objects/weakrefobject.o
478+
Objects/weakrefobject.o \
479+
Objects/perf_trampoline.o \
480+
@PERF_TRAMPOLINE_OBJ@
479481

480482
DEEPFREEZE_OBJS = Python/deepfreeze/deepfreeze.o
481483

@@ -2318,6 +2320,9 @@ config.status: $(srcdir)/configure
23182320

23192321
.PRECIOUS: config.status $(BUILDPYTHON) Makefile Makefile.pre
23202322

2323+
Objects/asm_trampoline.o: $(srcdir)/Objects/asm_trampoline.sx
2324+
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
2325+
23212326
# Some make's put the object file in the current directory
23222327
.c.o:
23232328
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Add a new ``-X perf`` Python command line option as well as
2+
:func:`sys._activate_perf_trampoline` and
3+
:func:`sys._deactivate_perf_trampoline` function in the :mod:`sys` module
4+
that allows to set/unset the interpreter in a way that the Linux ``perf``
5+
profiler can detect Python calls. Patch by Pablo Galindo.

Objects/asm_trampoline.sx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.text
2+
.globl _Py_trampoline_func_start
3+
_Py_trampoline_func_start:
4+
#ifdef __x86_64__
5+
push %rbp
6+
mov %rsp,%rbp
7+
mov %rdi,%rax
8+
mov %rsi,%rdi
9+
mov %rdx,%rsi
10+
mov %ecx,%edx
11+
call *%rax
12+
pop %rbp
13+
ret
14+
#endif // __x86_64__
15+
#ifdef __aarch64__
16+
TODO
17+
#endif
18+
.globl _Py_trampoline_func_end
19+
_Py_trampoline_func_end:
20+
.section .note.GNU-stack,"",@progbits
21+

Objects/perf_trampoline.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#include "Python.h"
2+
#include "pycore_ceval.h"
3+
#include "pycore_frame.h"
4+
#include "pycore_interp.h"
5+
6+
#ifdef HAVE_PERF_TRAMPOLINE
7+
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
#include <sys/mman.h>
11+
#include <sys/types.h>
12+
#include <unistd.h>
13+
14+
typedef PyObject *(*py_evaluator)(PyThreadState *, _PyInterpreterFrame *,
15+
int throwflag);
16+
typedef PyObject *(*py_trampoline)(py_evaluator, PyThreadState *,
17+
_PyInterpreterFrame *, int throwflag);
18+
extern void *_Py_trampoline_func_start;
19+
extern void *_Py_trampoline_func_end;
20+
21+
typedef struct {
22+
char *start_addr;
23+
char *current_addr;
24+
size_t size;
25+
size_t size_left;
26+
size_t code_size;
27+
} code_arena_t;
28+
29+
static Py_ssize_t extra_code_index = -1;
30+
static code_arena_t code_arena;
31+
32+
static int
33+
new_code_arena()
34+
{
35+
size_t page_size = sysconf(_SC_PAGESIZE);
36+
char *memory = mmap(NULL, // address
37+
page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
38+
MAP_PRIVATE | MAP_ANONYMOUS,
39+
-1, // fd (not used here)
40+
0); // offset (not used here)
41+
if (!memory) {
42+
Py_FatalError("Failed to allocate new code arena");
43+
return -1;
44+
}
45+
void *start = &_Py_trampoline_func_start;
46+
void *end = &_Py_trampoline_func_end;
47+
size_t code_size = end - start;
48+
49+
long n_copies = page_size / code_size;
50+
for (int i = 0; i < n_copies; i++) {
51+
memcpy(memory + i * code_size, start, code_size * sizeof(char));
52+
}
53+
54+
mprotect(memory, page_size, PROT_READ | PROT_EXEC);
55+
56+
code_arena.start_addr = memory;
57+
code_arena.current_addr = memory;
58+
code_arena.size = page_size;
59+
code_arena.size_left = page_size;
60+
code_arena.code_size = code_size;
61+
return 0;
62+
}
63+
64+
static inline py_trampoline
65+
code_arena_new_code(code_arena_t *code_arena)
66+
{
67+
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
68+
code_arena->size_left -= code_arena->code_size;
69+
code_arena->current_addr += code_arena->code_size;
70+
return trampoline;
71+
}
72+
73+
static inline py_trampoline
74+
compile_trampoline(void)
75+
{
76+
if (code_arena.size_left <= code_arena.code_size) {
77+
if (new_code_arena() < 0) {
78+
return NULL;
79+
}
80+
}
81+
82+
assert(code_arena.size_left <= code_arena.size);
83+
return code_arena_new_code(&code_arena);
84+
}
85+
86+
static inline FILE *
87+
perf_map_open(pid_t pid)
88+
{
89+
char filename[100];
90+
snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", pid);
91+
FILE *res = fopen(filename, "a");
92+
if (!res) {
93+
_Py_FatalErrorFormat(__func__, "Couldn't open %s: errno(%d)", filename, errno);
94+
return NULL;
95+
}
96+
return res;
97+
}
98+
99+
static inline int
100+
perf_map_close(FILE *fp)
101+
{
102+
if (fp) {
103+
return fclose(fp);
104+
}
105+
return 0;
106+
}
107+
108+
static void
109+
perf_map_write_entry(FILE *method_file, const void *code_addr,
110+
unsigned int code_size, const char *entry,
111+
const char *file)
112+
{
113+
fprintf(method_file, "%lx %x py::%s:%s\n", (unsigned long)code_addr,
114+
code_size, entry, file);
115+
}
116+
117+
static PyObject *
118+
py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
119+
int throw)
120+
{
121+
PyCodeObject *co = frame->f_code;
122+
py_trampoline f = NULL;
123+
_PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
124+
if (f == NULL) {
125+
if (extra_code_index == -1) {
126+
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
127+
}
128+
py_trampoline new_trampoline = compile_trampoline();
129+
if (new_trampoline == NULL) {
130+
return NULL;
131+
}
132+
FILE *pfile = perf_map_open(getpid());
133+
if (pfile == NULL) {
134+
return NULL;
135+
}
136+
perf_map_write_entry(pfile, new_trampoline, code_arena.code_size,
137+
PyUnicode_AsUTF8(co->co_qualname),
138+
PyUnicode_AsUTF8(co->co_filename));
139+
perf_map_close(pfile);
140+
_PyCode_SetExtra((PyObject *)co, extra_code_index,
141+
(void *)new_trampoline);
142+
f = new_trampoline;
143+
}
144+
assert(f != NULL);
145+
return f(_PyEval_EvalFrameDefault, ts, frame, throw);
146+
}
147+
#endif
148+
149+
int
150+
_PyPerfTrampoline_Init(int activate)
151+
{
152+
PyThreadState *tstate = _PyThreadState_GET();
153+
if (!activate) {
154+
tstate->interp->eval_frame = NULL;
155+
}
156+
else {
157+
#ifdef HAVE_PERF_TRAMPOLINE
158+
tstate->interp->eval_frame = py_trampoline_evaluator;
159+
if (new_code_arena() < 0) {
160+
return -1;
161+
}
162+
#endif
163+
}
164+
return 0;
165+
}

PCbuild/_freeze_module.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
<ClCompile Include="..\Objects\cellobject.c" />
130130
<ClCompile Include="..\Objects\classobject.c" />
131131
<ClCompile Include="..\Objects\codeobject.c" />
132+
<ClCompile Include="..\Objects\perf_trampoline.c" />
132133
<ClCompile Include="..\Objects\complexobject.c" />
133134
<ClCompile Include="..\Objects\descrobject.c" />
134135
<ClCompile Include="..\Objects\dictobject.c" />

PCbuild/_freeze_module.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@
8585
<ClCompile Include="..\Objects\codeobject.c">
8686
<Filter>Source Files</Filter>
8787
</ClCompile>
88+
<ClCompile Include="..\Objects\perf_trampolie.c">
89+
<Filter>Source Files</Filter>
90+
</ClCompile>
8891
<ClCompile Include="..\Python\compile.c">
8992
<Filter>Source Files</Filter>
9093
</ClCompile>

PCbuild/pythoncore.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@
430430
<ClCompile Include="..\Objects\cellobject.c" />
431431
<ClCompile Include="..\Objects\classobject.c" />
432432
<ClCompile Include="..\Objects\codeobject.c" />
433+
<ClCompile Include="..\Objects\perf_trampoline.c" />
433434
<ClCompile Include="..\Objects\complexobject.c" />
434435
<ClCompile Include="..\Objects\descrobject.c" />
435436
<ClCompile Include="..\Objects\dictobject.c" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,9 @@
926926
<ClCompile Include="..\Objects\codeobject.c">
927927
<Filter>Objects</Filter>
928928
</ClCompile>
929+
<ClCompile Include="..\Objects\perf_trampoline.c">
930+
<Filter>Objects</Filter>
931+
</ClCompile>
929932
<ClCompile Include="..\Objects\complexobject.c">
930933
<Filter>Objects</Filter>
931934
</ClCompile>

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy