From 5860dc798d77d3d46356d9ee835f1e4b7f93ca03 Mon Sep 17 00:00:00 2001 From: "Josh {*()} Rosenberg" <26495692+MojoVampire@users.noreply.github.com> Date: Wed, 22 May 2024 17:45:34 +0000 Subject: [PATCH] gh-119247: Add macros to use PySequence_Fast safely in free-threaded build (GH-119315) Add `Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST` and `Py_END_CRITICAL_SECTION_SEQUENCE_FAST` macros and update `str.join` to use them. Also add a regression test that would crash reliably without this patch. (cherry picked from commit baf347d91643a83483bae110092750d39471e0c2) Co-authored-by: Josh {*()} Rosenberg <26495692+MojoVampire@users.noreply.github.com> --- Include/internal/pycore_critical_section.h | 22 ++++++ Lib/test/test_free_threading/test_str.py | 75 +++++++++++++++++++ ...-05-21-11-35-11.gh-issue-119247.U6n6mh.rst | 4 + Objects/unicodeobject.c | 8 +- 4 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 Lib/test/test_free_threading/test_str.py create mode 100644 Misc/NEWS.d/next/C API/2024-05-21-11-35-11.gh-issue-119247.U6n6mh.rst diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 573d09a09683ef..7bebcdb67c7169 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -108,6 +108,26 @@ extern "C" { _PyCriticalSection2_End(&_cs2); \ } +// Specialized version of critical section locking to safely use +// PySequence_Fast APIs without the GIL. For performance, the argument *to* +// PySequence_Fast() is provided to the macro, not the *result* of +// PySequence_Fast(), which would require an extra test to determine if the +// lock must be acquired. +# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) \ + { \ + PyObject *_orig_seq = _PyObject_CAST(original); \ + const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ + _PyCriticalSection _cs; \ + if (_should_lock_cs) { \ + _PyCriticalSection_Begin(&_cs, &_orig_seq->ob_mutex); \ + } + +# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ + if (_should_lock_cs) { \ + _PyCriticalSection_End(&_cs); \ + } \ + } + // Asserts that the mutex is locked. The mutex must be held by the // top-most critical section otherwise there's the possibility // that the mutex would be swalled out in some code paths. @@ -137,6 +157,8 @@ extern "C" { # define Py_END_CRITICAL_SECTION() # define Py_BEGIN_CRITICAL_SECTION2(a, b) # define Py_END_CRITICAL_SECTION2() +# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) +# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() # define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) # define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) #endif /* !Py_GIL_DISABLED */ diff --git a/Lib/test/test_free_threading/test_str.py b/Lib/test/test_free_threading/test_str.py new file mode 100644 index 00000000000000..f5e93b7de0aa9b --- /dev/null +++ b/Lib/test/test_free_threading/test_str.py @@ -0,0 +1,75 @@ +import sys +import unittest + +from itertools import cycle +from threading import Event, Thread +from unittest import TestCase + +from test.support import threading_helper + +@threading_helper.requires_working_threading() +class TestStr(TestCase): + def test_racing_join_extend(self): + '''Test joining a string being extended by another thread''' + l = [] + ITERS = 100 + READERS = 10 + done_event = Event() + def writer_func(): + for i in range(ITERS): + l.extend(map(str, range(i))) + l.clear() + done_event.set() + def reader_func(): + while not done_event.is_set(): + ''.join(l) + writer = Thread(target=writer_func) + readers = [] + for x in range(READERS): + reader = Thread(target=reader_func) + readers.append(reader) + reader.start() + + writer.start() + writer.join() + for reader in readers: + reader.join() + + def test_racing_join_replace(self): + ''' + Test joining a string of characters being replaced with ephemeral + strings by another thread. + ''' + l = [*'abcdefg'] + MAX_ORDINAL = 1_000 + READERS = 10 + done_event = Event() + + def writer_func(): + for i, c in zip(cycle(range(len(l))), + map(chr, range(128, MAX_ORDINAL))): + l[i] = c + done_event.set() + + def reader_func(): + while not done_event.is_set(): + ''.join(l) + ''.join(l) + ''.join(l) + ''.join(l) + + writer = Thread(target=writer_func) + readers = [] + for x in range(READERS): + reader = Thread(target=reader_func) + readers.append(reader) + reader.start() + + writer.start() + writer.join() + for reader in readers: + reader.join() + + +if __name__ == "__main__": + unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-05-21-11-35-11.gh-issue-119247.U6n6mh.rst b/Misc/NEWS.d/next/C API/2024-05-21-11-35-11.gh-issue-119247.U6n6mh.rst new file mode 100644 index 00000000000000..3b2cdc8cf2dc5c --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-21-11-35-11.gh-issue-119247.U6n6mh.rst @@ -0,0 +1,4 @@ +Added ``Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST`` and +``Py_END_CRITICAL_SECTION_SEQUENCE_FAST`` macros to make it possible to use +PySequence_Fast APIs safely when free-threaded, and update str.join to work +without the GIL using them. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 057b417074ebea..480b6713905c70 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -44,6 +44,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_bytesobject.h" // _PyBytes_Repeat() #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_codecs.h" // _PyCodec_Lookup() +#include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST #include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec @@ -9559,13 +9560,14 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) return NULL; } - /* NOTE: the following code can't call back into Python code, - * so we are sure that fseq won't be mutated. - */ + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq); items = PySequence_Fast_ITEMS(fseq); seqlen = PySequence_Fast_GET_SIZE(fseq); res = _PyUnicode_JoinArray(separator, items, seqlen); + + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); + Py_DECREF(fseq); return res; }
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: