From 54ff198409c04b72570f708685631afb65c6dd38 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Thu, 10 Jul 2025 08:57:08 +0900 Subject: [PATCH 001/176] Upgrade Lib/types.py from Python 3.13.5 (#5928) --- Lib/types.py | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/Lib/types.py b/Lib/types.py index 4dab6ddce0..b036a85068 100644 --- a/Lib/types.py +++ b/Lib/types.py @@ -1,6 +1,7 @@ """ Define names for built-in types that aren't directly accessible as a builtin. """ + import sys # Iterators in Python aren't a matter of type but of protocol. A large @@ -52,17 +53,14 @@ def _m(self): pass try: raise TypeError -except TypeError: - tb = sys.exc_info()[2] - TracebackType = type(tb) - FrameType = type(tb.tb_frame) - tb = None; del tb +except TypeError as exc: + TracebackType = type(exc.__traceback__) + FrameType = type(exc.__traceback__.tb_frame) -# For Jython, the following two types are identical GetSetDescriptorType = type(FunctionType.__code__) MemberDescriptorType = type(FunctionType.__globals__) -del sys, _f, _g, _C, _c, _ag # Not for export +del sys, _f, _g, _C, _c, _ag, _cell_factory # Not for export # Provide a PEP 3115 compliant mechanism for class creation @@ -82,7 +80,7 @@ def resolve_bases(bases): updated = False shift = 0 for i, base in enumerate(bases): - if isinstance(base, type) and not isinstance(base, GenericAlias): + if isinstance(base, type): continue if not hasattr(base, "__mro_entries__"): continue @@ -146,6 +144,35 @@ def _calculate_meta(meta, bases): "of the metaclasses of all its bases") return winner + +def get_original_bases(cls, /): + """Return the class's "original" bases prior to modification by `__mro_entries__`. + + Examples:: + + from typing import TypeVar, Generic, NamedTuple, TypedDict + + T = TypeVar("T") + class Foo(Generic[T]): ... + class Bar(Foo[int], float): ... + class Baz(list[str]): ... + Eggs = NamedTuple("Eggs", [("a", int), ("b", str)]) + Spam = TypedDict("Spam", {"a": int, "b": str}) + + assert get_original_bases(Bar) == (Foo[int], float) + assert get_original_bases(Baz) == (list[str],) + assert get_original_bases(Eggs) == (NamedTuple,) + assert get_original_bases(Spam) == (TypedDict,) + assert get_original_bases(int) == (object,) + """ + try: + return cls.__dict__.get("__orig_bases__", cls.__bases__) + except AttributeError: + raise TypeError( + f"Expected an instance of type, not {type(cls).__name__!r}" + ) from None + + class DynamicClassAttribute: """Route attribute access on a class to __getattr__. @@ -158,7 +185,7 @@ class DynamicClassAttribute: attributes on the class with the same name. (Enum used this between Python versions 3.4 - 3.9 .) - Subclass from this to use a different method of accessing virtual atributes + Subclass from this to use a different method of accessing virtual attributes and still be treated properly by the inspect module. (Enum uses this since Python 3.10 .) @@ -305,4 +332,11 @@ def wrapped(*args, **kwargs): NoneType = type(None) NotImplementedType = type(NotImplemented) +def __getattr__(name): + if name == 'CapsuleType': + import _socket + return type(_socket.CAPI) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + __all__ = [n for n in globals() if n[:1] != '_'] +__all__ += ['CapsuleType'] From f608df4a23050434de60f407891fc452f7e39ebd Mon Sep 17 00:00:00 2001 From: yt2b <76801443+yt2b@users.noreply.github.com> Date: Thu, 10 Jul 2025 09:10:52 +0900 Subject: [PATCH 002/176] Formatting with width and separator doesn't work correctly (#5927) * Fix add_magnitude_separators * Add extra tests --- common/src/format.rs | 8 ++++++-- extra_tests/snippets/builtin_format.py | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/common/src/format.rs b/common/src/format.rs index 92ab99a571..819061f86b 100644 --- a/common/src/format.rs +++ b/common/src/format.rs @@ -436,8 +436,12 @@ impl FormatSpec { let sep = char::from(fg); let inter = self.get_separator_interval().try_into().unwrap(); let magnitude_len = magnitude_str.len(); - let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; - let disp_digit_cnt = cmp::max(width, magnitude_len as i32); + let disp_digit_cnt = if self.fill == Some('0'.into()) { + let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; + cmp::max(width, magnitude_len as i32) + } else { + magnitude_len as i32 + }; Self::add_magnitude_separators_for_char(magnitude_str, inter, sep, disp_digit_cnt) } None => magnitude_str, diff --git a/extra_tests/snippets/builtin_format.py b/extra_tests/snippets/builtin_format.py index 457adaa136..ac7afb769a 100644 --- a/extra_tests/snippets/builtin_format.py +++ b/extra_tests/snippets/builtin_format.py @@ -81,6 +81,9 @@ def test_zero_padding(): assert f"{123.456:+011,}" == "+00,123.456" assert f"{1234:.3g}" == "1.23e+03" assert f"{1234567:.6G}" == "1.23457E+06" +assert f"{1234:10}" == " 1234" +assert f"{1234:10,}" == " 1,234" +assert f"{1234:010,}" == "00,001,234" assert f"{'🐍':4}" == "🐍 " assert_raises( ValueError, "{:,o}".format, 1, _msg="ValueError: Cannot specify ',' with 'o'." @@ -165,6 +168,9 @@ def test_zero_padding(): assert f"{3.1415:#.2}" == "3.1" assert f"{3.1415:#.3}" == "3.14" assert f"{3.1415:#.4}" == "3.142" +assert f"{1234.5:10}" == " 1234.5" +assert f"{1234.5:10,}" == " 1,234.5" +assert f"{1234.5:010,}" == "0,001,234.5" assert f"{12.34 + 5.6j}" == "(12.34+5.6j)" assert f"{12.34 - 5.6j: }" == "( 12.34-5.6j)" assert f"{12.34 + 5.6j:20}" == " (12.34+5.6j)" From 18d7c1baf12e5c5480d92dd8e1e394ccaac42f80 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Thu, 10 Jul 2025 10:27:03 +0900 Subject: [PATCH 003/176] codeobj.qualname (#5929) --- Lib/test/test_code.py | 2 -- compiler/codegen/src/compile.rs | 26 ++++++++++++++++++++++++-- compiler/codegen/src/ir.rs | 5 ++++- compiler/core/src/bytecode.rs | 4 ++++ compiler/core/src/marshal.rs | 5 +++++ vm/src/builtins/code.rs | 5 +++++ 6 files changed, 42 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 1aceff4efc..6b0dc09e28 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -249,8 +249,6 @@ def func(): pass co.co_freevars, co.co_cellvars) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_qualname(self): self.assertEqual( CodeTest.test_qualname.__code__.co_qualname, diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 4180b79712..ab9b469ad8 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -310,8 +310,8 @@ impl<'src> Compiler<'src> { kwonlyarg_count: 0, source_path: source_code.path.to_owned(), first_line_number: OneIndexed::MIN, - obj_name: code_name, - + obj_name: code_name.clone(), + qualname: Some(code_name), blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), constants: IndexSet::default(), @@ -402,6 +402,13 @@ impl Compiler<'_> { .map(|(var, _)| var.clone()) .collect(); + // Calculate qualname based on the current qualified path + let qualname = if self.qualified_path.is_empty() { + Some(obj_name.clone()) + } else { + Some(self.qualified_path.join(".")) + }; + let info = ir::CodeInfo { flags, posonlyarg_count, @@ -410,6 +417,7 @@ impl Compiler<'_> { source_path, first_line_number, obj_name, + qualname, blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), @@ -1496,6 +1504,10 @@ impl Compiler<'_> { self.push_qualified_path(name); let qualified_name = self.qualified_path.join("."); + + // Update the qualname in the current code info + self.code_stack.last_mut().unwrap().qualname = Some(qualified_name.clone()); + self.push_qualified_path(""); let (doc_str, body) = split_doc(body, &self.opts); @@ -1720,6 +1732,9 @@ impl Compiler<'_> { self.push_output(bytecode::CodeFlags::empty(), 0, 0, 0, name.to_owned()); + // Update the qualname in the current code info + self.code_stack.last_mut().unwrap().qualname = Some(qualified_name.clone()); + let (doc_str, body) = split_doc(body, &self.opts); let dunder_name = self.name("__name__"); @@ -3495,6 +3510,9 @@ impl Compiler<'_> { let mut func_flags = self .enter_function(&name, parameters.as_deref().unwrap_or(&Default::default()))?; + // Lambda qualname should be + self.code_stack.last_mut().unwrap().qualname = Some(name.clone()); + self.ctx = CompileContext { loop_data: Option::None, in_class: prev_ctx.in_class, @@ -3956,6 +3974,10 @@ impl Compiler<'_> { // Create magnificent function : self.push_output(flags, 1, 1, 0, name.to_owned()); + + // Set qualname for comprehension + self.code_stack.last_mut().unwrap().qualname = Some(name.to_owned()); + let arg0 = self.varname(".0")?; let return_none = init_collection.is_none(); diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 7acd9d7f6a..5e115d9dee 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -73,6 +73,7 @@ pub struct CodeInfo { pub source_path: String, pub first_line_number: OneIndexed, pub obj_name: String, // Name of the object that created this code object + pub qualname: Option, // Qualified name of the object pub blocks: Vec, pub current_block: BlockIdx, @@ -99,6 +100,7 @@ impl CodeInfo { source_path, first_line_number, obj_name, + qualname, mut blocks, current_block: _, @@ -162,7 +164,8 @@ impl CodeInfo { kwonlyarg_count, source_path, first_line_number: Some(first_line_number), - obj_name, + obj_name: obj_name.clone(), + qualname: qualname.unwrap_or(obj_name), max_stackdepth, instructions: instructions.into_boxed_slice(), diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index be55fe3502..3fe9356004 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -115,6 +115,8 @@ pub struct CodeObject { pub max_stackdepth: u32, pub obj_name: C::Name, // Name of the object that created this code object + pub qualname: C::Name, + // Qualified name of the object (like CPython's co_qualname) pub cell2arg: Option>, pub constants: Box<[C]>, pub names: Box<[C::Name]>, @@ -1140,6 +1142,7 @@ impl CodeObject { freevars: map_names(self.freevars), source_path: bag.make_name(self.source_path.as_ref()), obj_name: bag.make_name(self.obj_name.as_ref()), + qualname: bag.make_name(self.qualname.as_ref()), instructions: self.instructions, locations: self.locations, @@ -1169,6 +1172,7 @@ impl CodeObject { freevars: map_names(&self.freevars), source_path: bag.make_name(self.source_path.as_ref()), obj_name: bag.make_name(self.obj_name.as_ref()), + qualname: bag.make_name(self.qualname.as_ref()), instructions: self.instructions.clone(), locations: self.locations.clone(), diff --git a/compiler/core/src/marshal.rs b/compiler/core/src/marshal.rs index 700bb48230..fdbae7ec30 100644 --- a/compiler/core/src/marshal.rs +++ b/compiler/core/src/marshal.rs @@ -210,6 +210,9 @@ pub fn deserialize_code( let len = rdr.read_u32()?; let obj_name = bag.make_name(rdr.read_str(len)?); + let len = rdr.read_u32()?; + let qualname = bag.make_name(rdr.read_str(len)?); + let len = rdr.read_u32()?; let cell2arg = (len != 0) .then(|| { @@ -250,6 +253,7 @@ pub fn deserialize_code( first_line_number, max_stackdepth, obj_name, + qualname, cell2arg, constants, names, @@ -609,6 +613,7 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) buf.write_u32(code.max_stackdepth); write_vec(buf, code.obj_name.as_ref().as_bytes()); + write_vec(buf, code.qualname.as_ref().as_bytes()); let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]); write_len(buf, cell2arg.len()); diff --git a/vm/src/builtins/code.rs b/vm/src/builtins/code.rs index 0058dbf555..37c883043a 100644 --- a/vm/src/builtins/code.rs +++ b/vm/src/builtins/code.rs @@ -298,6 +298,10 @@ impl PyCode { fn co_name(&self) -> PyStrRef { self.code.obj_name.to_owned() } + #[pygetset] + fn co_qualname(&self) -> PyStrRef { + self.code.qualname.to_owned() + } #[pygetset] fn co_names(&self, vm: &VirtualMachine) -> PyTupleRef { @@ -401,6 +405,7 @@ impl PyCode { source_path: source_path.as_object().as_interned_str(vm).unwrap(), first_line_number, obj_name: obj_name.as_object().as_interned_str(vm).unwrap(), + qualname: self.code.qualname, max_stackdepth: self.code.max_stackdepth, instructions: self.code.instructions.clone(), From 8c4c63673e5dabcb031768e5e775e222162cf2f5 Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Thu, 10 Jul 2025 18:11:24 +0900 Subject: [PATCH 004/176] fix(itertools): add re-entrancy guard to tee object (#5931) * fix(itertools): add re-entrancy guard to tee object * apply feedback PyRwLock -> PyMutex & remove AtomicCell lock field --- Lib/test/test_itertools.py | 2 -- vm/src/stdlib/itertools.rs | 17 +++++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 8709948b92..072279ea3a 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1761,8 +1761,6 @@ def test_tee_del_backward(self): del forward, backward raise - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_tee_reenter(self): class I: first = True diff --git a/vm/src/stdlib/itertools.rs b/vm/src/stdlib/itertools.rs index a018fe382d..63b2d390ca 100644 --- a/vm/src/stdlib/itertools.rs +++ b/vm/src/stdlib/itertools.rs @@ -1184,23 +1184,28 @@ mod decl { #[derive(Debug)] struct PyItertoolsTeeData { iterable: PyIter, - values: PyRwLock>, + values: PyMutex>, } impl PyItertoolsTeeData { fn new(iterable: PyIter, _vm: &VirtualMachine) -> PyResult> { Ok(PyRc::new(Self { iterable, - values: PyRwLock::new(vec![]), + values: PyMutex::new(vec![]), })) } fn get_item(&self, vm: &VirtualMachine, index: usize) -> PyResult { - if self.values.read().len() == index { - let result = raise_if_stop!(self.iterable.next(vm)?); - self.values.write().push(result); + let Some(mut values) = self.values.try_lock() else { + return Err(vm.new_runtime_error("cannot re-enter the tee iterator")); + }; + + if values.len() == index { + let obj = raise_if_stop!(self.iterable.next(vm)?); + values.push(obj); } - Ok(PyIterReturn::Return(self.values.read()[index].clone())) + + Ok(PyIterReturn::Return(values[index].clone())) } } From b2013cddc9a83c7a72847df6d08e1febb7f67927 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 10 Jul 2025 13:55:38 +0300 Subject: [PATCH 005/176] Add "take" comment command (#5932) --- .github/workflows/comment-commands.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/comment-commands.yml diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml new file mode 100644 index 0000000000..2b2a1caefe --- /dev/null +++ b/.github/workflows/comment-commands.yml @@ -0,0 +1,24 @@ +name: Comment Commands + +on: + issue_comment: + types: created + +jobs: + issue_assign: + if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' + runs-on: ubuntu-latest + + concurrency: + group: ${{ github.actor }}-issue-assign + + permissions: + issues: write + + steps: + - run: gh issue edit "${{ env.ISSUE_NUMBER }}" --add-assignee "${{ env.USER_LOGIN }}" + env: + ISSUE_NUMBER: ${{ github.event.issue.number }} + USER_LOGIN: ${{ github.event.comment.user.login }} + GH_TOKEN: ${{ github.token }} + GH_REPO: ${{ github.repository }} From ef385a9efa3d976e528015ddb8b47d7b6f589ced Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 10 Jul 2025 16:46:29 +0300 Subject: [PATCH 006/176] Add missing `@` for the "take" comment command (#5933) --- .github/workflows/comment-commands.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml index 2b2a1caefe..0d209fd676 100644 --- a/.github/workflows/comment-commands.yml +++ b/.github/workflows/comment-commands.yml @@ -16,7 +16,7 @@ jobs: issues: write steps: - - run: gh issue edit "${{ env.ISSUE_NUMBER }}" --add-assignee "${{ env.USER_LOGIN }}" + - run: gh issue edit "${{ env.ISSUE_NUMBER }}" --add-assignee "@${{ env.USER_LOGIN }}" env: ISSUE_NUMBER: ${{ github.event.issue.number }} USER_LOGIN: ${{ github.event.comment.user.login }} From 4c7523080ab6bbd2b7386554ba56562168d5c26b Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Thu, 10 Jul 2025 22:47:24 +0900 Subject: [PATCH 007/176] fix(format): isolate special grouping rule to sign-aware zero-padding (#5924) --- common/src/format.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/common/src/format.rs b/common/src/format.rs index 819061f86b..9b2a37d450 100644 --- a/common/src/format.rs +++ b/common/src/format.rs @@ -436,7 +436,9 @@ impl FormatSpec { let sep = char::from(fg); let inter = self.get_separator_interval().try_into().unwrap(); let magnitude_len = magnitude_str.len(); - let disp_digit_cnt = if self.fill == Some('0'.into()) { + let disp_digit_cnt = if self.fill == Some('0'.into()) + && self.align == Some(FormatAlign::AfterSign) + { let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; cmp::max(width, magnitude_len as i32) } else { @@ -1323,6 +1325,45 @@ mod tests { ); } + #[test] + fn test_format_int_width_and_grouping() { + // issue #5922: width + comma grouping should pad left, not inside the number + let spec = FormatSpec::parse("10,").unwrap(); + let result = spec.format_int(&BigInt::from(1234)).unwrap(); + assert_eq!(result, " 1,234"); // CPython 3.13.5 + } + + #[test] + fn test_format_int_padding_with_grouping() { + // CPython behavior: f'{1234:010,}' results in "00,001,234" + let spec1 = FormatSpec::parse("010,").unwrap(); + let result1 = spec1.format_int(&BigInt::from(1234)).unwrap(); + assert_eq!(result1, "00,001,234"); + + // CPython behavior: f'{-1234:010,}' results in "-0,001,234" + let spec2 = FormatSpec::parse("010,").unwrap(); + let result2 = spec2.format_int(&BigInt::from(-1234)).unwrap(); + assert_eq!(result2, "-0,001,234"); + + // CPython behavior: f'{-1234:=10,}' results in "- 1,234" + let spec3 = FormatSpec::parse("=10,").unwrap(); + let result3 = spec3.format_int(&BigInt::from(-1234)).unwrap(); + assert_eq!(result3, "- 1,234"); + + // CPython behavior: f'{1234:=10,}' results in " 1,234" (same as right-align for positive numbers) + let spec4 = FormatSpec::parse("=10,").unwrap(); + let result4 = spec4.format_int(&BigInt::from(1234)).unwrap(); + assert_eq!(result4, " 1,234"); + } + + #[test] + fn test_format_int_non_aftersign_zero_padding() { + // CPython behavior: f'{1234:0>10,}' results in "000001,234" + let spec = FormatSpec::parse("0>10,").unwrap(); + let result = spec.format_int(&BigInt::from(1234)).unwrap(); + assert_eq!(result, "000001,234"); + } + #[test] fn test_format_parse() { let expected = Ok(FormatString { From 089c39f741e143e057769f6916895896240acf42 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Thu, 10 Jul 2025 16:47:55 +0300 Subject: [PATCH 008/176] Update `test_string_literals.py` from 3.13.5 (#5934) --- Lib/test/test_string_literals.py | 121 ++++++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py index 537c8fc5c8..098e8d3984 100644 --- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -111,26 +111,92 @@ def test_eval_str_invalid_escape(self): for b in range(1, 128): if b in b"""\n\r"'01234567NU\\abfnrtuvx""": continue - with self.assertWarns(DeprecationWarning): + with self.assertWarns(SyntaxWarning): self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b)) with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', category=DeprecationWarning) + warnings.simplefilter('always', category=SyntaxWarning) eval("'''\n\\z'''") self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") self.assertEqual(w[0].filename, '') - self.assertEqual(w[0].lineno, 1) + self.assertEqual(w[0].lineno, 2) with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('error', category=DeprecationWarning) + warnings.simplefilter('error', category=SyntaxWarning) with self.assertRaises(SyntaxError) as cm: eval("'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\z'") self.assertEqual(exc.filename, '') - self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.lineno, 2) self.assertEqual(exc.offset, 1) + # Check that the warning is raised only once if there are syntax errors + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', category=SyntaxWarning) + with self.assertRaises(SyntaxError) as cm: + eval("'\\e' $") + exc = cm.exception + self.assertEqual(len(w), 1) + self.assertEqual(w[0].category, SyntaxWarning) + self.assertRegex(str(w[0].message), 'invalid escape sequence') + self.assertEqual(w[0].filename, '') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_eval_str_invalid_octal_escape(self): + for i in range(0o400, 0o1000): + with self.assertWarns(SyntaxWarning): + self.assertEqual(eval(r"'\%o'" % i), chr(i)) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', category=SyntaxWarning) + eval("'''\n\\407'''") + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), + r"invalid octal escape sequence '\407'") + self.assertEqual(w[0].filename, '') + self.assertEqual(w[0].lineno, 2) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) + with self.assertRaises(SyntaxError) as cm: + eval("'''\n\\407'''") + exc = cm.exception + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.filename, '') + self.assertEqual(exc.lineno, 2) + self.assertEqual(exc.offset, 1) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_invalid_escape_locations_with_offset(self): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) + with self.assertRaises(SyntaxError) as cm: + eval("\"'''''''''''''''''''''invalid\\ Escape\"") + exc = cm.exception + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\ '") + self.assertEqual(exc.filename, '') + self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.offset, 30) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) + with self.assertRaises(SyntaxError) as cm: + eval("\"''Incorrect \\ logic?\"") + exc = cm.exception + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\ '") + self.assertEqual(exc.filename, '') + self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.offset, 14) + def test_eval_str_raw(self): self.assertEqual(eval(""" r'x' """), 'x') self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') @@ -163,24 +229,52 @@ def test_eval_bytes_invalid_escape(self): for b in range(1, 128): if b in b"""\n\r"'01234567\\abfnrtvx""": continue - with self.assertWarns(DeprecationWarning): + with self.assertWarns(SyntaxWarning): self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b])) with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', category=DeprecationWarning) + warnings.simplefilter('always', category=SyntaxWarning) eval("b'''\n\\z'''") self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") self.assertEqual(w[0].filename, '') - self.assertEqual(w[0].lineno, 1) + self.assertEqual(w[0].lineno, 2) with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('error', category=DeprecationWarning) + warnings.simplefilter('error', category=SyntaxWarning) with self.assertRaises(SyntaxError) as cm: eval("b'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid escape sequence '\z'") self.assertEqual(exc.filename, '') - self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.lineno, 2) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_eval_bytes_invalid_octal_escape(self): + for i in range(0o400, 0o1000): + with self.assertWarns(SyntaxWarning): + self.assertEqual(eval(r"b'\%o'" % i), bytes([i & 0o377])) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', category=SyntaxWarning) + eval("b'''\n\\407'''") + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), + r"invalid octal escape sequence '\407'") + self.assertEqual(w[0].filename, '') + self.assertEqual(w[0].lineno, 2) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('error', category=SyntaxWarning) + with self.assertRaises(SyntaxError) as cm: + eval("b'''\n\\407'''") + exc = cm.exception + self.assertEqual(w, []) + self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.filename, '') + self.assertEqual(exc.lineno, 2) def test_eval_bytes_raw(self): self.assertEqual(eval(""" br'x' """), b'x') @@ -217,6 +311,13 @@ def test_eval_str_u(self): self.assertRaises(SyntaxError, eval, """ bu'' """) self.assertRaises(SyntaxError, eval, """ ub'' """) + def test_uppercase_prefixes(self): + self.assertEqual(eval(""" B'x' """), b'x') + self.assertEqual(eval(r""" R'\x01' """), r'\x01') + self.assertEqual(eval(r""" BR'\x01' """), br'\x01') + self.assertEqual(eval(""" F'{1+1}' """), f'{1+1}') + self.assertEqual(eval(r""" U'\U0001d120' """), u'\U0001d120') + def check_encoding(self, encoding, extra=""): modname = "xx_" + encoding.replace("-", "_") fn = os.path.join(self.tmpdir, modname + ".py") From 38837e587b9cd9674e86ca6282368d07843cd541 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 11 Jul 2025 02:35:21 +0300 Subject: [PATCH 009/176] Make `take` issue comment to use curl (#5937) * Revert "Add missing `@` for the "take" comment command (#5933)" This reverts commit ef385a9efa3d976e528015ddb8b47d7b6f589ced. * Fix `take` --- .github/workflows/comment-commands.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml index 0d209fd676..0a5d48e903 100644 --- a/.github/workflows/comment-commands.yml +++ b/.github/workflows/comment-commands.yml @@ -13,12 +13,11 @@ jobs: group: ${{ github.actor }}-issue-assign permissions: - issues: write + issues: write steps: - - run: gh issue edit "${{ env.ISSUE_NUMBER }}" --add-assignee "@${{ env.USER_LOGIN }}" - env: - ISSUE_NUMBER: ${{ github.event.issue.number }} - USER_LOGIN: ${{ github.event.comment.user.login }} - GH_TOKEN: ${{ github.token }} - GH_REPO: ${{ github.repository }} + # Using REST API and not `gh issue edit`. https://github.com/cli/cli/issues/6235#issuecomment-1243487651 + - run: curl \ + -H "Authorization: token ${{ github.token }}" \ + -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' \ + https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees From 01f15065fa033092e78db1b04f211f7f76bf46c5 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 11 Jul 2025 02:36:08 +0300 Subject: [PATCH 010/176] Use `raise_if_stop!` macro where possible (#5938) --- vm/src/builtins/enumerate.rs | 7 +++---- vm/src/builtins/filter.rs | 19 +++++++++---------- vm/src/builtins/map.rs | 7 +++---- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/vm/src/builtins/enumerate.rs b/vm/src/builtins/enumerate.rs index 64cd4e774e..db3d45b248 100644 --- a/vm/src/builtins/enumerate.rs +++ b/vm/src/builtins/enumerate.rs @@ -8,6 +8,7 @@ use crate::{ convert::ToPyObject, function::OptionalArg, protocol::{PyIter, PyIterReturn}, + raise_if_stop, types::{Constructor, IterNext, Iterable, SelfIter}, }; use malachite_bigint::BigInt; @@ -73,12 +74,10 @@ impl Py { } impl SelfIter for PyEnumerate {} + impl IterNext for PyEnumerate { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { - let next_obj = match zelf.iterator.next(vm)? { - PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), - PyIterReturn::Return(obj) => obj, - }; + let next_obj = raise_if_stop!(zelf.iterator.next(vm)?); let mut counter = zelf.counter.write(); let position = counter.clone(); *counter += 1; diff --git a/vm/src/builtins/filter.rs b/vm/src/builtins/filter.rs index 5dd0162f83..661fbd0228 100644 --- a/vm/src/builtins/filter.rs +++ b/vm/src/builtins/filter.rs @@ -3,6 +3,7 @@ use crate::{ Context, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine, class::PyClassImpl, protocol::{PyIter, PyIterReturn}, + raise_if_stop, types::{Constructor, IterNext, Iterable, SelfIter}, }; @@ -45,24 +46,22 @@ impl PyFilter { } impl SelfIter for PyFilter {} + impl IterNext for PyFilter { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { let predicate = &zelf.predicate; loop { - let next_obj = match zelf.iterator.next(vm)? { - PyIterReturn::Return(obj) => obj, - PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), - }; + let next_obj = raise_if_stop!(zelf.iterator.next(vm)?); let predicate_value = if vm.is_none(predicate) { next_obj.clone() } else { - // the predicate itself can raise StopIteration which does stop the filter - // iteration - match PyIterReturn::from_pyresult(predicate.call((next_obj.clone(),), vm), vm)? { - PyIterReturn::Return(obj) => obj, - PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), - } + // the predicate itself can raise StopIteration which does stop the filter iteration + raise_if_stop!(PyIterReturn::from_pyresult( + predicate.call((next_obj.clone(),), vm), + vm + )?) }; + if predicate_value.try_to_bool(vm)? { return Ok(PyIterReturn::Return(next_obj)); } diff --git a/vm/src/builtins/map.rs b/vm/src/builtins/map.rs index 004028c2cb..06a533f8bc 100644 --- a/vm/src/builtins/map.rs +++ b/vm/src/builtins/map.rs @@ -5,6 +5,7 @@ use crate::{ class::PyClassImpl, function::PosArgs, protocol::{PyIter, PyIterReturn}, + raise_if_stop, types::{Constructor, IterNext, Iterable, SelfIter}, }; @@ -53,14 +54,12 @@ impl PyMap { } impl SelfIter for PyMap {} + impl IterNext for PyMap { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { let mut next_objs = Vec::new(); for iterator in &zelf.iterators { - let item = match iterator.next(vm)? { - PyIterReturn::Return(obj) => obj, - PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), - }; + let item = raise_if_stop!(iterator.next(vm)?); next_objs.push(item); } From 2c30e01ae2860c7052cbaa57f9ff07b83b78c3d3 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 11 Jul 2025 02:36:34 +0300 Subject: [PATCH 011/176] Update test_deque from 3.13.5 (#5939) --- Lib/test/test_deque.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_deque.py b/Lib/test/test_deque.py index 2b0144eb06..9f00e12edd 100644 --- a/Lib/test/test_deque.py +++ b/Lib/test/test_deque.py @@ -166,7 +166,7 @@ def test_contains(self): with self.assertRaises(RuntimeError): n in d - def test_contains_count_stop_crashes(self): + def test_contains_count_index_stop_crashes(self): class A: def __eq__(self, other): d.clear() @@ -178,6 +178,10 @@ def __eq__(self, other): with self.assertRaises(RuntimeError): _ = d.count(3) + d = deque([A()]) + with self.assertRaises(RuntimeError): + d.index(0) + def test_extend(self): d = deque('a') self.assertRaises(TypeError, d.extend, 1) From 2f94a63958115d2680d8160a080039ae7128b873 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 11:24:20 +0900 Subject: [PATCH 012/176] Add SymbolUsage::TypeParams (#5941) --- Lib/test/test_typing.py | 4 ---- compiler/codegen/src/compile.rs | 7 ++++++- compiler/codegen/src/symboltable.rs | 20 +++++++++++++++++--- vm/src/frame.rs | 6 +++++- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index a048c39cc9..1c74e1adac 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -3888,8 +3888,6 @@ def test_pep695_generic_class_with_future_annotations(self): # should not have changed as a result of the get_type_hints() calls! self.assertEqual(ann_module695.__dict__, original_globals) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pep695_generic_class_with_future_annotations_and_local_shadowing(self): hints_for_B = get_type_hints(ann_module695.B) self.assertEqual(hints_for_B, {"x": int, "y": str, "z": bytes}) @@ -3935,8 +3933,6 @@ def test_pep_695_generic_method_with_future_annotations_name_clash_with_global_v set(ann_module695.D.generic_method_2.__type_params__) ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pep_695_generics_with_future_annotations_nested_in_function(self): results = ann_module695.nested() diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index ab9b469ad8..61e459500a 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -638,7 +638,11 @@ impl Compiler<'_> { cache = &mut info.cellvar_cache; NameOpType::Deref } // TODO: is this right? - // SymbolScope::Unknown => NameOpType::Global, + SymbolScope::TypeParams => { + // Type parameters are always cell variables + cache = &mut info.cellvar_cache; + NameOpType::Deref + } // SymbolScope::Unknown => NameOpType::Global, }; if NameUsage::Load == usage && name == "__debug__" { @@ -1630,6 +1634,7 @@ impl Compiler<'_> { let vars = match symbol.scope { SymbolScope::Free => &parent_code.freevar_cache, SymbolScope::Cell => &parent_code.cellvar_cache, + SymbolScope::TypeParams => &parent_code.cellvar_cache, _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => &parent_code.freevar_cache, x => unreachable!( "var {} in a {:?} should be free or cell but it's {:?}", diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 2949f39a9f..f215ce38f4 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -113,6 +113,7 @@ pub enum SymbolScope { GlobalImplicit, Free, Cell, + TypeParams, } bitflags! { @@ -359,6 +360,10 @@ impl SymbolTableAnalyzer { SymbolScope::Local | SymbolScope::Cell => { // all is well } + SymbolScope::TypeParams => { + // Type parameters are always cell variables in their scope + symbol.scope = SymbolScope::Cell; + } SymbolScope::Unknown => { // Try hard to figure out what the scope of this symbol is. let scope = if symbol.is_bound() { @@ -557,6 +562,7 @@ enum SymbolUsage { AnnotationParameter, AssignedNamedExprInComprehension, Iter, + TypeParam, } struct SymbolTableBuilder<'src> { @@ -1267,6 +1273,9 @@ impl SymbolTableBuilder<'_> { } fn scan_type_params(&mut self, type_params: &TypeParams) -> SymbolTableResult { + // Register .type_params as a type parameter (automatically becomes cell variable) + self.register_name(".type_params", SymbolUsage::TypeParam, type_params.range)?; + // First register all type parameters for type_param in &type_params.type_params { match type_param { @@ -1276,7 +1285,7 @@ impl SymbolTableBuilder<'_> { range: type_var_range, .. }) => { - self.register_name(name.as_str(), SymbolUsage::Assigned, *type_var_range)?; + self.register_name(name.as_str(), SymbolUsage::TypeParam, *type_var_range)?; if let Some(binding) = bound { self.scan_expression(binding, ExpressionContext::Load)?; } @@ -1286,14 +1295,14 @@ impl SymbolTableBuilder<'_> { range: param_spec_range, .. }) => { - self.register_name(name, SymbolUsage::Assigned, *param_spec_range)?; + self.register_name(name, SymbolUsage::TypeParam, *param_spec_range)?; } TypeParam::TypeVarTuple(TypeParamTypeVarTuple { name, range: type_var_tuple_range, .. }) => { - self.register_name(name, SymbolUsage::Assigned, *type_var_tuple_range)?; + self.register_name(name, SymbolUsage::TypeParam, *type_var_tuple_range)?; } } } @@ -1544,6 +1553,11 @@ impl SymbolTableBuilder<'_> { SymbolUsage::Iter => { flags.insert(SymbolFlags::ITER); } + SymbolUsage::TypeParam => { + // Type parameters are always cell variables in their scope + symbol.scope = SymbolScope::Cell; + flags.insert(SymbolFlags::ASSIGNED); + } } // and even more checking diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 3b69de8fd3..2bcfeebf54 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -596,7 +596,11 @@ impl ExecutingFrame<'_> { } bytecode::Instruction::LoadClassDeref(i) => { let i = i.get(arg) as usize; - let name = self.code.freevars[i - self.code.cellvars.len()]; + let name = if i < self.code.cellvars.len() { + self.code.cellvars[i] + } else { + self.code.freevars[i - self.code.cellvars.len()] + }; let value = self.locals.mapping().subscript(name, vm).ok(); self.push_value(match value { Some(v) => v, From 9b133b856021b6cae681a7aeb30a023c9e4cd41c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 13:11:15 +0900 Subject: [PATCH 013/176] CodeInfo::private (#5943) --- compiler/codegen/src/compile.rs | 26 ++++++++++++++++++++------ compiler/codegen/src/ir.rs | 2 ++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 61e459500a..c6f01ecc82 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -78,7 +78,6 @@ struct Compiler<'src> { done_with_future_stmts: DoneWithFuture, future_annotations: bool, ctx: CompileContext, - class_name: Option, opts: CompileOpts, in_annotation: bool, } @@ -312,6 +311,7 @@ impl<'src> Compiler<'src> { first_line_number: OneIndexed::MIN, obj_name: code_name.clone(), qualname: Some(code_name), + private: None, blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), constants: IndexSet::default(), @@ -334,7 +334,6 @@ impl<'src> Compiler<'src> { in_class: false, func: FunctionContext::NoFunction, }, - class_name: None, opts, in_annotation: false, } @@ -409,6 +408,9 @@ impl Compiler<'_> { Some(self.qualified_path.join(".")) }; + // Get the private name from current scope if exists + let private = self.code_stack.last().and_then(|info| info.private.clone()); + let info = ir::CodeInfo { flags, posonlyarg_count, @@ -418,6 +420,7 @@ impl Compiler<'_> { first_line_number, obj_name, qualname, + private, blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), @@ -587,7 +590,12 @@ impl Compiler<'_> { } fn mangle<'a>(&self, name: &'a str) -> Cow<'a, str> { - symboltable::mangle_name(self.class_name.as_deref(), name) + // Use u_private from current code unit for name mangling + let private = self + .code_stack + .last() + .and_then(|info| info.private.as_deref()); + symboltable::mangle_name(private, name) } fn check_forbidden_name(&mut self, name: &str, usage: NameUsage) -> CompileResult<()> { @@ -1709,8 +1717,6 @@ impl Compiler<'_> { loop_data: None, }; - let prev_class_name = self.class_name.replace(name.to_owned()); - // Check if the class is declared global let symbol_table = self.symbol_table_stack.last().unwrap(); let symbol = unwrap_internal( @@ -1729,8 +1735,14 @@ impl Compiler<'_> { // If there are type params, we need to push a special symbol table just for them if let Some(type_params) = type_params { self.push_symbol_table(); + // Save current private name to restore later + let saved_private = self.code_stack.last().and_then(|info| info.private.clone()); // Compile type parameters and store as .type_params self.compile_type_params(type_params)?; + // Restore private name after type param scope + if let Some(private) = saved_private { + self.code_stack.last_mut().unwrap().private = Some(private); + } let dot_type_params = self.name(".type_params"); emit!(self, Instruction::StoreLocal(dot_type_params)); } @@ -1740,6 +1752,9 @@ impl Compiler<'_> { // Update the qualname in the current code info self.code_stack.last_mut().unwrap().qualname = Some(qualified_name.clone()); + // For class scopes, set u_private to the class name for name mangling + self.code_stack.last_mut().unwrap().private = Some(name.to_owned()); + let (doc_str, body) = split_doc(body, &self.opts); let dunder_name = self.name("__name__"); @@ -1793,7 +1808,6 @@ impl Compiler<'_> { let code = self.pop_code_object(); - self.class_name = prev_class_name; self.qualified_path.pop(); self.qualified_path.append(global_path_prefix.as_mut()); self.ctx = prev_ctx; diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 5e115d9dee..852051777e 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -74,6 +74,7 @@ pub struct CodeInfo { pub first_line_number: OneIndexed, pub obj_name: String, // Name of the object that created this code object pub qualname: Option, // Qualified name of the object + pub private: Option, // For private name mangling, mostly for class pub blocks: Vec, pub current_block: BlockIdx, @@ -101,6 +102,7 @@ impl CodeInfo { first_line_number, obj_name, qualname, + private: _, // private is only used during compilation mut blocks, current_block: _, From 8b6c78c884caa7fe8ef49e672fd15aa38a54e17f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 13:35:52 +0900 Subject: [PATCH 014/176] SymbolTableType::Lambda (#5942) --- compiler/codegen/src/symboltable.rs | 6 ++++-- compiler/core/src/bytecode.rs | 2 +- vm/src/frame.rs | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index f215ce38f4..66dbff326a 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -80,6 +80,7 @@ pub enum SymbolTableType { Module, Class, Function, + Lambda, Comprehension, TypeParams, } @@ -90,6 +91,7 @@ impl fmt::Display for SymbolTableType { Self::Module => write!(f, "module"), Self::Class => write!(f, "class"), Self::Function => write!(f, "function"), + Self::Lambda => write!(f, "lambda"), Self::Comprehension => write!(f, "comprehension"), Self::TypeParams => write!(f, "type parameter"), // TODO missing types from the C implementation @@ -493,7 +495,7 @@ impl SymbolTableAnalyzer { location: None, }); } - SymbolTableType::Function => { + SymbolTableType::Function | SymbolTableType::Lambda => { if let Some(parent_symbol) = symbols.get_mut(&symbol.name) { if let SymbolScope::Unknown = parent_symbol.scope { // this information is new, as the assignment is done in inner scope @@ -1140,7 +1142,7 @@ impl SymbolTableBuilder<'_> { } else { self.enter_scope( "lambda", - SymbolTableType::Function, + SymbolTableType::Lambda, self.line_index_start(expression.range()), ); } diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index 3fe9356004..a27174e859 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -414,7 +414,7 @@ op_arg_enum!( // PrepReraiseS tar = 1, // TypeVarWithBound = 2, // TypeVarWithConstraints = 3, - // SetFunctionTypeParams = 4, + SetFunctionTypeParams = 4, /// Set default value for type parameter (PEP 695) SetTypeparamDefault = 5, } diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 2bcfeebf54..6c9181c2c1 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -2286,6 +2286,12 @@ impl ExecutingFrame<'_> { bytecode::IntrinsicFunction2::SetTypeparamDefault => { crate::stdlib::typing::set_typeparam_default(arg1, arg2, vm) } + bytecode::IntrinsicFunction2::SetFunctionTypeParams => { + // arg1 is the function, arg2 is the type params tuple + // Set __type_params__ attribute on the function + arg1.set_attr("__type_params__", arg2, vm)?; + Ok(arg1) + } } } From 0ae6b4575c9b31b29db487c5fcef4965b1b45dea Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 16:16:01 +0900 Subject: [PATCH 015/176] typing TypeAlias (#5945) --- Lib/test/test_typing.py | 2 - compiler/codegen/src/compile.rs | 62 +++++++++++----- compiler/core/src/bytecode.rs | 46 ++---------- vm/src/frame.rs | 126 ++++++++++++++++---------------- vm/src/stdlib/typing.rs | 21 ++++++ 5 files changed, 134 insertions(+), 123 deletions(-) diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 1c74e1adac..d0fe1b0188 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -6649,8 +6649,6 @@ def manager(): self.assertIsInstance(cm, typing.ContextManager) self.assertNotIsInstance(42, typing.ContextManager) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_contextmanager_type_params(self): cm1 = typing.ContextManager[int] self.assertEqual(get_args(cm1), (int, bool | None)) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index c6f01ecc82..d3d412e9c5 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -1067,33 +1067,41 @@ impl Compiler<'_> { // For PEP 695 syntax, we need to compile type_params first // so that they're available when compiling the value expression + // Push name first + self.emit_load_const(ConstantData::Str { + value: name_string.clone().into(), + }); + if let Some(type_params) = type_params { self.push_symbol_table(); - // Compile type params first to define T1, T2, etc. + // Compile type params and push to stack self.compile_type_params(type_params)?; - // Stack now has type_params tuple at top + // Stack now has [name, type_params_tuple] // Compile value expression (can now see T1, T2) self.compile_expression(value)?; - // Stack: [type_params_tuple, value] - - // We need [value, type_params_tuple] for TypeAlias instruction - emit!(self, Instruction::Rotate2); + // Stack: [name, type_params_tuple, value] self.pop_symbol_table(); } else { - // No type params - push value first, then None (not empty tuple) - self.compile_expression(value)?; // Push None for type_params (matching CPython) self.emit_load_const(ConstantData::None); + // Stack: [name, None] + + // Compile value expression + self.compile_expression(value)?; + // Stack: [name, None, value] } - // Push name last - self.emit_load_const(ConstantData::Str { - value: name_string.clone().into(), - }); - emit!(self, Instruction::TypeAlias); + // Build tuple of 3 elements and call intrinsic + emit!(self, Instruction::BuildTuple { size: 3 }); + emit!( + self, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::TypeAlias + } + ); self.store_name(&name_string)?; } Stmt::IpyEscapeCommand(_) => todo!(), @@ -1246,12 +1254,22 @@ impl Compiler<'_> { self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); - emit!(self, Instruction::TypeVarWithBound); + emit!( + self, + Instruction::CallIntrinsic2 { + func: bytecode::IntrinsicFunction2::TypeVarWithBound + } + ); } else { self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); - emit!(self, Instruction::TypeVar); + emit!( + self, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::TypeVar + } + ); } // Handle default value if present (PEP 695) @@ -1274,7 +1292,12 @@ impl Compiler<'_> { self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); - emit!(self, Instruction::ParamSpec); + emit!( + self, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::ParamSpec + } + ); // Handle default value if present (PEP 695) if let Some(default_expr) = default { @@ -1296,7 +1319,12 @@ impl Compiler<'_> { self.emit_load_const(ConstantData::Str { value: name.as_str().into(), }); - emit!(self, Instruction::TypeVarTuple); + emit!( + self, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::TypeVarTuple + } + ); // Handle default value if present (PEP 695) if let Some(default_expr) = default { diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index a27174e859..cef332bfbc 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -382,27 +382,13 @@ op_arg_enum!( #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[repr(u8)] pub enum IntrinsicFunction1 { - /// Import * special case - // ImportStar = 0, - /// Set stop iteration value - // StopAsyncIteration = 1, - /// Unary operators - // UnaryPositive = 2, - // UnaryNegative = 3, - // UnaryNot = 4, - // UnaryInvert = 5, - /// Exit init subclass - // ExitInitCheck = 6, - /// Create a new list from an iterator - // ListToTupleForCall = 7, /// Type parameter related - // TypeVar = 8, - // TypeVarTuple = 9, - // ParamSpec = 10, + TypeVar = 7, + ParamSpec = 8, + TypeVarTuple = 9, /// Generic subscript for PEP 695 SubscriptGeneric = 10, - // TypeAlias = 12, - // TypeParams = 13, + TypeAlias = 11, } ); @@ -412,8 +398,8 @@ op_arg_enum!( #[repr(u8)] pub enum IntrinsicFunction2 { // PrepReraiseS tar = 1, - // TypeVarWithBound = 2, - // TypeVarWithConstraints = 3, + TypeVarWithBound = 2, + TypeVarWithConstraint = 3, SetFunctionTypeParams = 4, /// Set default value for type parameter (PEP 695) SetTypeparamDefault = 5, @@ -668,16 +654,10 @@ pub enum Instruction { MatchKeys, MatchClass(Arg), ExtendedArg, - TypeVar, - TypeVarWithBound, - TypeVarWithConstraint, - TypeAlias, - TypeVarTuple, - ParamSpec, // If you add a new instruction here, be sure to keep LAST_INSTRUCTION updated } // This must be kept up to date to avoid marshaling errors -const LAST_INSTRUCTION: Instruction = Instruction::ParamSpec; +const LAST_INSTRUCTION: Instruction = Instruction::ExtendedArg; const _: () = assert!(mem::size_of::() == 1); impl From for u8 { @@ -1380,12 +1360,6 @@ impl Instruction { MatchKeys => -1, MatchClass(_) => -2, ExtendedArg => 0, - TypeVar => 0, - TypeVarWithBound => -1, - TypeVarWithConstraint => -1, - TypeAlias => -2, - ParamSpec => 0, - TypeVarTuple => 0, } } @@ -1565,12 +1539,6 @@ impl Instruction { MatchKeys => w!(MatchKeys), MatchClass(arg) => w!(MatchClass, arg), ExtendedArg => w!(ExtendedArg, Arg::::marker()), - TypeVar => w!(TypeVar), - TypeVarWithBound => w!(TypeVarWithBound), - TypeVarWithConstraint => w!(TypeVarWithConstraint), - TypeAlias => w!(TypeAlias), - ParamSpec => w!(ParamSpec), - TypeVarTuple => w!(TypeVarTuple), } } } diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 6c9181c2c1..7c935b814b 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -1255,71 +1255,6 @@ impl ExecutingFrame<'_> { *extend_arg = true; Ok(None) } - bytecode::Instruction::TypeVar => { - let type_name = self.pop_value(); - let type_var: PyObjectRef = - typing::TypeVar::new(vm, type_name.clone(), vm.ctx.none(), vm.ctx.none()) - .into_ref(&vm.ctx) - .into(); - self.push_value(type_var); - Ok(None) - } - bytecode::Instruction::TypeVarWithBound => { - let type_name = self.pop_value(); - let bound = self.pop_value(); - let type_var: PyObjectRef = - typing::TypeVar::new(vm, type_name.clone(), bound, vm.ctx.none()) - .into_ref(&vm.ctx) - .into(); - self.push_value(type_var); - Ok(None) - } - bytecode::Instruction::TypeVarWithConstraint => { - let type_name = self.pop_value(); - let constraint = self.pop_value(); - let type_var: PyObjectRef = - typing::TypeVar::new(vm, type_name.clone(), vm.ctx.none(), constraint) - .into_ref(&vm.ctx) - .into(); - self.push_value(type_var); - Ok(None) - } - bytecode::Instruction::TypeAlias => { - let name = self.pop_value(); - let type_params_obj = self.pop_value(); - - // CPython allows None or tuple for type_params - let type_params: PyTupleRef = if vm.is_none(&type_params_obj) { - // If None, use empty tuple (matching CPython's behavior) - vm.ctx.empty_tuple.clone() - } else { - type_params_obj - .downcast() - .map_err(|_| vm.new_type_error("Type params must be a tuple."))? - }; - - let value = self.pop_value(); - let type_alias = typing::TypeAliasType::new(name, type_params, value); - self.push_value(type_alias.into_ref(&vm.ctx).into()); - Ok(None) - } - bytecode::Instruction::ParamSpec => { - let param_spec_name = self.pop_value(); - let param_spec: PyObjectRef = typing::ParamSpec::new(param_spec_name.clone(), vm) - .into_ref(&vm.ctx) - .into(); - self.push_value(param_spec); - Ok(None) - } - bytecode::Instruction::TypeVarTuple => { - let type_var_tuple_name = self.pop_value(); - let type_var_tuple: PyObjectRef = - typing::TypeVarTuple::new(type_var_tuple_name.clone(), vm) - .into_ref(&vm.ctx) - .into(); - self.push_value(type_var_tuple); - Ok(None) - } bytecode::Instruction::MatchMapping => { // Pop the subject from stack let subject = self.pop_value(); @@ -2272,6 +2207,53 @@ impl ExecutingFrame<'_> { // Used for PEP 695: Generic[*type_params] crate::builtins::genericalias::subscript_generic(arg, vm) } + bytecode::IntrinsicFunction1::TypeVar => { + let type_var: PyObjectRef = + typing::TypeVar::new(vm, arg.clone(), vm.ctx.none(), vm.ctx.none()) + .into_ref(&vm.ctx) + .into(); + Ok(type_var) + } + bytecode::IntrinsicFunction1::ParamSpec => { + let param_spec: PyObjectRef = typing::ParamSpec::new(arg.clone(), vm) + .into_ref(&vm.ctx) + .into(); + Ok(param_spec) + } + bytecode::IntrinsicFunction1::TypeVarTuple => { + let type_var_tuple: PyObjectRef = typing::TypeVarTuple::new(arg.clone(), vm) + .into_ref(&vm.ctx) + .into(); + Ok(type_var_tuple) + } + bytecode::IntrinsicFunction1::TypeAlias => { + // TypeAlias receives a tuple of (name, type_params, value) + let tuple: PyTupleRef = arg + .downcast() + .map_err(|_| vm.new_type_error("TypeAlias expects a tuple argument"))?; + + if tuple.len() != 3 { + return Err(vm.new_type_error(format!( + "TypeAlias expects exactly 3 arguments, got {}", + tuple.len() + ))); + } + + let name = tuple.as_slice()[0].clone(); + let type_params_obj = tuple.as_slice()[1].clone(); + let value = tuple.as_slice()[2].clone(); + + let type_params: PyTupleRef = if vm.is_none(&type_params_obj) { + vm.ctx.empty_tuple.clone() + } else { + type_params_obj + .downcast() + .map_err(|_| vm.new_type_error("Type params must be a tuple."))? + }; + + let type_alias = typing::TypeAliasType::new(name, type_params, value); + Ok(type_alias.into_ref(&vm.ctx).into()) + } } } @@ -2292,6 +2274,20 @@ impl ExecutingFrame<'_> { arg1.set_attr("__type_params__", arg2, vm)?; Ok(arg1) } + bytecode::IntrinsicFunction2::TypeVarWithBound => { + let type_var: PyObjectRef = + typing::TypeVar::new(vm, arg1.clone(), arg2, vm.ctx.none()) + .into_ref(&vm.ctx) + .into(); + Ok(type_var) + } + bytecode::IntrinsicFunction2::TypeVarWithConstraint => { + let type_var: PyObjectRef = + typing::TypeVar::new(vm, arg1.clone(), vm.ctx.none(), arg2) + .into_ref(&vm.ctx) + .into(); + Ok(type_var) + } } } diff --git a/vm/src/stdlib/typing.rs b/vm/src/stdlib/typing.rs index 331206b214..77feee44c0 100644 --- a/vm/src/stdlib/typing.rs +++ b/vm/src/stdlib/typing.rs @@ -113,6 +113,27 @@ pub(crate) mod decl { value, } } + + #[pygetset] + fn __name__(&self) -> PyObjectRef { + self.name.clone() + } + + #[pygetset] + fn __value__(&self) -> PyObjectRef { + self.value.clone() + } + + #[pygetset] + fn __type_params__(&self) -> PyTupleRef { + self.type_params.clone() + } + + #[pymethod(name = "__repr__")] + fn repr(&self, vm: &VirtualMachine) -> PyResult { + let name = self.name.str(vm)?; + Ok(name.as_str().to_owned()) + } } // impl AsMapping for Generic { From 392f9c26c5bc8f604fb3a7123720389b7f4ca65f Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 17:25:57 +0900 Subject: [PATCH 016/176] Instruction::Resume (#5944) * ImportStar * Instruction::Resume --- compiler/codegen/src/compile.rs | 69 +++++++++++- ...pile__tests__nested_double_async_with.snap | 100 +++++++++--------- compiler/core/src/bytecode.rs | 24 ++++- jit/src/instructions.rs | 4 + vm/src/frame.rs | 22 +++- 5 files changed, 161 insertions(+), 58 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index d3d412e9c5..98b70dfbb2 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -784,7 +784,12 @@ impl Compiler<'_> { if import_star { // from .... import * - emit!(self, Instruction::ImportStar); + emit!( + self, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::ImportStar + } + ); } else { // from mod import a, b as c @@ -1556,6 +1561,14 @@ impl Compiler<'_> { .constants .insert_full(ConstantData::None); + // Emit RESUME instruction at function start + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AtFuncStart as u32 + } + ); + self.compile_statements(body)?; // Emit None at end: @@ -1971,6 +1984,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetAwaitable); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterAwait as u32 + } + ); emit!(self, Instruction::SetupAsyncWith { end: final_block }); } else { emit!(self, Instruction::SetupWith { end: final_block }); @@ -2012,6 +2031,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetAwaitable); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterAwait as u32 + } + ); } emit!(self, Instruction::WithCleanupFinish); @@ -2050,6 +2075,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetANext); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterAwait as u32 + } + ); self.compile_store(target)?; emit!(self, Instruction::PopBlock); } else { @@ -3521,6 +3552,12 @@ impl Compiler<'_> { Option::None => self.emit_load_const(ConstantData::None), }; emit!(self, Instruction::YieldValue); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterYield as u32 + } + ); } Expr::Await(ExprAwait { value, .. }) => { if self.ctx.func != FunctionContext::AsyncFunction { @@ -3530,6 +3567,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetAwaitable); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterAwait as u32 + } + ); } Expr::YieldFrom(ExprYieldFrom { value, .. }) => { match self.ctx.func { @@ -3546,6 +3589,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetIter); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterYieldFrom as u32 + } + ); } Expr::Name(ExprName { id, .. }) => self.load_name(id.as_str())?, Expr::Lambda(ExprLambda { @@ -3672,6 +3721,12 @@ impl Compiler<'_> { compiler.compile_comprehension_element(elt)?; compiler.mark_generator(); emit!(compiler, Instruction::YieldValue); + emit!( + compiler, + Instruction::Resume { + arg: bytecode::ResumeType::AfterYield as u32 + } + ); emit!(compiler, Instruction::Pop); Ok(()) @@ -4067,6 +4122,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetANext); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterAwait as u32 + } + ); self.compile_store(&generator.target)?; emit!(self, Instruction::PopBlock); } else { @@ -4145,6 +4206,12 @@ impl Compiler<'_> { emit!(self, Instruction::GetAwaitable); self.emit_load_const(ConstantData::None); emit!(self, Instruction::YieldFrom); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AfterAwait as u32 + } + ); } Ok(()) diff --git a/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap b/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap index 36b00c567d..9165a6cfbf 100644 --- a/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap +++ b/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap @@ -11,7 +11,7 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 6 CallFunctionPositional(1) 7 BuildTuple (2) 8 GetIter - >> 9 ForIter (71) + >> 9 ForIter (73) 10 StoreLocal (2, stop_exc) 2 11 LoadNameAny (3, self) @@ -21,10 +21,10 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 15 CallFunctionPositional(1) 16 LoadConst (("type")) 17 CallMethodKeyword (1) - 18 SetupWith (68) + 18 SetupWith (70) 19 Pop - 3 20 SetupExcept (40) + 3 20 SetupExcept (42) 4 21 LoadNameAny (6, egg) 22 CallFunctionPositional(0) @@ -32,55 +32,57 @@ expression: "compile_exec(\"\\\nfor stop_exc in (StopIteration('spam'), StopAsyn 24 GetAwaitable 25 LoadConst (None) 26 YieldFrom - 27 SetupAsyncWith (33) - 28 Pop + 27 Resume (3) + 28 SetupAsyncWith (34) + 29 Pop - 5 29 LoadNameAny (2, stop_exc) - 30 Raise (Raise) + 5 30 LoadNameAny (2, stop_exc) + 31 Raise (Raise) - 4 31 PopBlock - 32 EnterFinally - >> 33 WithCleanupStart - 34 GetAwaitable - 35 LoadConst (None) - 36 YieldFrom - 37 WithCleanupFinish - 38 PopBlock - 39 Jump (57) - >> 40 Duplicate + 4 32 PopBlock + 33 EnterFinally + >> 34 WithCleanupStart + 35 GetAwaitable + 36 LoadConst (None) + 37 YieldFrom + 38 Resume (3) + 39 WithCleanupFinish + 40 PopBlock + 41 Jump (59) + >> 42 Duplicate - 6 41 LoadNameAny (7, Exception) - 42 TestOperation (ExceptionMatch) - 43 JumpIfFalse (56) - 44 StoreLocal (8, ex) + 6 43 LoadNameAny (7, Exception) + 44 TestOperation (ExceptionMatch) + 45 JumpIfFalse (58) + 46 StoreLocal (8, ex) - 7 45 LoadNameAny (3, self) - 46 LoadMethod (9, assertIs) - 47 LoadNameAny (8, ex) - 48 LoadNameAny (2, stop_exc) - 49 CallMethodPositional (2) - 50 Pop - 51 PopException - 52 LoadConst (None) - 53 StoreLocal (8, ex) - 54 DeleteLocal (8, ex) - 55 Jump (66) - >> 56 Raise (Reraise) + 7 47 LoadNameAny (3, self) + 48 LoadMethod (9, assertIs) + 49 LoadNameAny (8, ex) + 50 LoadNameAny (2, stop_exc) + 51 CallMethodPositional (2) + 52 Pop + 53 PopException + 54 LoadConst (None) + 55 StoreLocal (8, ex) + 56 DeleteLocal (8, ex) + 57 Jump (68) + >> 58 Raise (Reraise) - 9 >> 57 LoadNameAny (3, self) - 58 LoadMethod (10, fail) - 59 LoadConst ("") - 60 LoadNameAny (2, stop_exc) - 61 FormatValue (None) - 62 LoadConst (" was suppressed") - 63 BuildString (2) - 64 CallMethodPositional (1) - 65 Pop + 9 >> 59 LoadNameAny (3, self) + 60 LoadMethod (10, fail) + 61 LoadConst ("") + 62 LoadNameAny (2, stop_exc) + 63 FormatValue (None) + 64 LoadConst (" was suppressed") + 65 BuildString (2) + 66 CallMethodPositional (1) + 67 Pop - 2 >> 66 PopBlock - 67 EnterFinally - >> 68 WithCleanupStart - 69 WithCleanupFinish - 70 Jump (9) - >> 71 PopBlock - 72 ReturnConst (None) + 2 >> 68 PopBlock + 69 EnterFinally + >> 70 WithCleanupStart + 71 WithCleanupFinish + 72 Jump (9) + >> 73 PopBlock + 74 ReturnConst (None) diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index cef332bfbc..3e74fe6273 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -24,6 +24,16 @@ pub enum ConversionFlag { Repr = b'r' as i8, } +/// Resume type for the RESUME instruction +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] +#[repr(u32)] +pub enum ResumeType { + AtFuncStart = 0, + AfterYield = 1, + AfterYieldFrom = 2, + AfterAwait = 3, +} + pub trait Constant: Sized { type Name: AsRef; @@ -382,6 +392,8 @@ op_arg_enum!( #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[repr(u8)] pub enum IntrinsicFunction1 { + /// Import * operation + ImportStar = 2, /// Type parameter related TypeVar = 7, ParamSpec = 8, @@ -419,8 +431,6 @@ pub enum Instruction { }, /// Importing without name ImportNameless, - /// Import * - ImportStar, /// from ... import ... ImportFrom { idx: Arg, @@ -549,6 +559,12 @@ pub enum Instruction { }, YieldValue, YieldFrom, + + /// Resume execution (e.g., at function start, after yield, etc.) + Resume { + arg: Arg, + }, + SetupAnnotation, SetupLoop, @@ -1240,7 +1256,6 @@ impl Instruction { match self { Nop => 0, ImportName { .. } | ImportNameless => -1, - ImportStar => -1, ImportFrom { .. } => 1, LoadFast(_) | LoadNameAny(_) | LoadGlobal(_) | LoadDeref(_) | LoadClassDeref(_) => 1, StoreFast(_) | StoreLocal(_) | StoreGlobal(_) | StoreDeref(_) => -1, @@ -1305,6 +1320,7 @@ impl Instruction { } ReturnValue => -1, ReturnConst { .. } => 0, + Resume { .. } => 0, YieldValue => 0, YieldFrom => -1, SetupAnnotation | SetupLoop | SetupFinally { .. } | EnterFinally | EndFinally => 0, @@ -1433,7 +1449,6 @@ impl Instruction { Nop => w!(Nop), ImportName { idx } => w!(ImportName, name = idx), ImportNameless => w!(ImportNameless), - ImportStar => w!(ImportStar), ImportFrom { idx } => w!(ImportFrom, name = idx), LoadFast(idx) => w!(LoadFast, varname = idx), LoadNameAny(idx) => w!(LoadNameAny, name = idx), @@ -1493,6 +1508,7 @@ impl Instruction { ForIter { target } => w!(ForIter, target), ReturnValue => w!(ReturnValue), ReturnConst { idx } => fmt_const("ReturnConst", arg, f, idx), + Resume { arg } => w!(Resume, arg), YieldValue => w!(YieldValue), YieldFrom => w!(YieldFrom), SetupAnnotation => w!(SetupAnnotation), diff --git a/jit/src/instructions.rs b/jit/src/instructions.rs index 9ec0a4385e..5f0123d22b 100644 --- a/jit/src/instructions.rs +++ b/jit/src/instructions.rs @@ -612,6 +612,10 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { self.stack.pop(); Ok(()) } + Instruction::Resume { arg: _resume_arg } => { + // TODO: Implement the resume instruction + Ok(()) + } _ => Err(JitCompileError::NotSupported), } } diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 7c935b814b..a3e31c5c2b 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -541,10 +541,6 @@ impl ExecutingFrame<'_> { self.import(vm, None)?; Ok(None) } - bytecode::Instruction::ImportStar => { - self.import_star(vm)?; - Ok(None) - } bytecode::Instruction::ImportFrom { idx } => { let obj = self.import_from(vm, idx.get(arg))?; self.push_value(obj); @@ -893,6 +889,18 @@ impl ExecutingFrame<'_> { Ok(Some(ExecutionResult::Yield(value))) } bytecode::Instruction::YieldFrom => self.execute_yield_from(vm), + bytecode::Instruction::Resume { arg: resume_arg } => { + // Resume execution after yield, await, or at function start + // In CPython, this checks instrumentation and eval breaker + // For now, we just check for signals/interrupts + let _resume_type = resume_arg.get(arg); + + // Check for interrupts if not resuming from yield_from + // if resume_type < bytecode::ResumeType::AfterYieldFrom as u32 { + // vm.check_signals()?; + // } + Ok(None) + } bytecode::Instruction::SetupAnnotation => self.setup_annotations(vm), bytecode::Instruction::SetupLoop => { self.push_block(BlockType::Loop); @@ -2203,6 +2211,12 @@ impl ExecutingFrame<'_> { vm: &VirtualMachine, ) -> PyResult { match func { + bytecode::IntrinsicFunction1::ImportStar => { + // arg is the module object + self.push_value(arg); // Push module back on stack for import_star + self.import_star(vm)?; + Ok(vm.ctx.none()) + } bytecode::IntrinsicFunction1::SubscriptGeneric => { // Used for PEP 695: Generic[*type_params] crate::builtins::genericalias::subscript_generic(arg, vm) From 50c241fd71982039159fd2508c228b8e59265a18 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 11 Jul 2025 11:44:46 +0300 Subject: [PATCH 017/176] Fix yaml error in `take` issue command (#5946) --- .github/workflows/comment-commands.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml index 0a5d48e903..d1a457c73e 100644 --- a/.github/workflows/comment-commands.yml +++ b/.github/workflows/comment-commands.yml @@ -17,7 +17,5 @@ jobs: steps: # Using REST API and not `gh issue edit`. https://github.com/cli/cli/issues/6235#issuecomment-1243487651 - - run: curl \ - -H "Authorization: token ${{ github.token }}" \ - -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' \ - https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees + - run: | + curl -H "Authorization: token ${{ github.token }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees From 59c7fcbb9862da38281a6605c5dda177f7a2552c Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 18:21:51 +0900 Subject: [PATCH 018/176] compiler set_qualname (#5930) * set_qualname * remove qualified_path --- compiler/codegen/src/compile.rs | 153 ++++++++++++++++++++++---------- 1 file changed, 105 insertions(+), 48 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 98b70dfbb2..5aef2020d0 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -74,7 +74,6 @@ struct Compiler<'src> { source_code: SourceCode<'src>, // current_source_location: SourceLocation, current_source_range: TextRange, - qualified_path: Vec, done_with_future_stmts: DoneWithFuture, future_annotations: bool, ctx: CompileContext, @@ -326,7 +325,6 @@ impl<'src> Compiler<'src> { source_code, // current_source_location: SourceLocation::default(), current_source_range: TextRange::default(), - qualified_path: Vec::new(), done_with_future_stmts: DoneWithFuture::No, future_annotations: false, ctx: CompileContext { @@ -401,12 +399,8 @@ impl Compiler<'_> { .map(|(var, _)| var.clone()) .collect(); - // Calculate qualname based on the current qualified path - let qualname = if self.qualified_path.is_empty() { - Some(obj_name.clone()) - } else { - Some(self.qualified_path.join(".")) - }; + // Qualname will be set later by set_qualname + let qualname = None; // Get the private name from current scope if exists let private = self.code_stack.last().and_then(|info| info.private.clone()); @@ -467,6 +461,98 @@ impl Compiler<'_> { .to_u32() } + /// Set the qualified name for the current code object, based on CPython's compiler_set_qualname + fn set_qualname(&mut self) -> String { + let qualname = self.make_qualname(); + self.current_code_info().qualname = Some(qualname.clone()); + qualname + } + fn make_qualname(&mut self) -> String { + let stack_size = self.code_stack.len(); + assert!(stack_size >= 1); + + let current_obj_name = self.current_code_info().obj_name.clone(); + + // If we're at the module level (stack_size == 1), qualname is just the name + if stack_size <= 1 { + return current_obj_name; + } + + // Check parent scope + let mut parent_idx = stack_size - 2; + let mut parent = &self.code_stack[parent_idx]; + + // If parent is a type parameter scope, look at grandparent + if parent.obj_name.starts_with(" self.symbol_table_stack.len() { + // We might be in a situation where symbol table isn't pushed yet + // In this case, check the parent symbol table + if let Some(parent_table) = self.symbol_table_stack.last() { + if let Some(symbol) = parent_table.lookup(¤t_obj_name) { + if symbol.scope == SymbolScope::GlobalExplicit { + force_global = true; + } + } + } + } else if let Some(_current_table) = self.symbol_table_stack.last() { + // Mangle the name if necessary (for private names in classes) + let mangled_name = self.mangle(¤t_obj_name); + + // Look up in parent symbol table to check scope + if self.symbol_table_stack.len() >= 2 { + let parent_table = &self.symbol_table_stack[self.symbol_table_stack.len() - 2]; + if let Some(symbol) = parent_table.lookup(&mangled_name) { + if symbol.scope == SymbolScope::GlobalExplicit { + force_global = true; + } + } + } + } + + // Build the qualified name + if force_global { + // For global symbols, qualname is just the name + current_obj_name + } else { + // Check parent scope type + let parent_obj_name = &parent.obj_name; + + // Determine if parent is a function-like scope + let is_function_parent = parent.flags.contains(bytecode::CodeFlags::IS_OPTIMIZED) + && !parent_obj_name.starts_with("<") // Not a special scope like , , etc. + && parent_obj_name != ""; // Not the module scope + + if is_function_parent { + // For functions, append . to parent qualname + // Use parent's qualname if available, otherwise use parent_obj_name + let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + format!("{parent_qualname}..{current_obj_name}") + } else { + // For classes and other scopes, use parent's qualname directly + // Use parent's qualname if available, otherwise use parent_obj_name + let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + if parent_qualname == "" { + // Module level, just use the name + current_obj_name + } else { + // Concatenate parent qualname with current name + format!("{parent_qualname}.{current_obj_name}") + } + } + } + } + fn compile_program( &mut self, body: &ModModule, @@ -1547,13 +1633,8 @@ impl Compiler<'_> { }, }; - self.push_qualified_path(name); - let qualified_name = self.qualified_path.join("."); - - // Update the qualname in the current code info - self.code_stack.last_mut().unwrap().qualname = Some(qualified_name.clone()); - - self.push_qualified_path(""); + // Set qualname using the new method + let qualname = self.set_qualname(); let (doc_str, body) = split_doc(body, &self.opts); @@ -1582,8 +1663,6 @@ impl Compiler<'_> { } let code = self.pop_code_object(); - self.qualified_path.pop(); - self.qualified_path.pop(); self.ctx = prev_ctx; // Prepare generic type parameters: @@ -1646,7 +1725,7 @@ impl Compiler<'_> { code: Box::new(code), }); self.emit_load_const(ConstantData::Str { - value: qualified_name.into(), + value: qualname.into(), }); // Turn code object into function object: @@ -1758,21 +1837,6 @@ impl Compiler<'_> { loop_data: None, }; - // Check if the class is declared global - let symbol_table = self.symbol_table_stack.last().unwrap(); - let symbol = unwrap_internal( - self, - symbol_table - .lookup(name.as_ref()) - .ok_or_else(|| InternalError::MissingSymbol(name.to_owned())), - ); - let mut global_path_prefix = Vec::new(); - if symbol.scope == SymbolScope::GlobalExplicit { - global_path_prefix.append(&mut self.qualified_path); - } - self.push_qualified_path(name); - let qualified_name = self.qualified_path.join("."); - // If there are type params, we need to push a special symbol table just for them if let Some(type_params) = type_params { self.push_symbol_table(); @@ -1790,8 +1854,8 @@ impl Compiler<'_> { self.push_output(bytecode::CodeFlags::empty(), 0, 0, 0, name.to_owned()); - // Update the qualname in the current code info - self.code_stack.last_mut().unwrap().qualname = Some(qualified_name.clone()); + // Set qualname using the new method + let qualname = self.set_qualname(); // For class scopes, set u_private to the class name for name mangling self.code_stack.last_mut().unwrap().private = Some(name.to_owned()); @@ -1803,10 +1867,10 @@ impl Compiler<'_> { let dunder_module = self.name("__module__"); emit!(self, Instruction::StoreLocal(dunder_module)); self.emit_load_const(ConstantData::Str { - value: qualified_name.into(), + value: qualname.into(), }); - let qualname = self.name("__qualname__"); - emit!(self, Instruction::StoreLocal(qualname)); + let qualname_name = self.name("__qualname__"); + emit!(self, Instruction::StoreLocal(qualname_name)); self.load_docstring(doc_str); let doc = self.name("__doc__"); emit!(self, Instruction::StoreLocal(doc)); @@ -1848,9 +1912,6 @@ impl Compiler<'_> { self.emit_return_value(); let code = self.pop_code_object(); - - self.qualified_path.pop(); - self.qualified_path.append(global_path_prefix.as_mut()); self.ctx = prev_ctx; emit!(self, Instruction::LoadBuildClass); @@ -3606,8 +3667,8 @@ impl Compiler<'_> { let mut func_flags = self .enter_function(&name, parameters.as_deref().unwrap_or(&Default::default()))?; - // Lambda qualname should be - self.code_stack.last_mut().unwrap().qualname = Some(name.clone()); + // Set qualname for lambda + self.set_qualname(); self.ctx = CompileContext { loop_data: Option::None, @@ -4078,7 +4139,7 @@ impl Compiler<'_> { self.push_output(flags, 1, 1, 0, name.to_owned()); // Set qualname for comprehension - self.code_stack.last_mut().unwrap().qualname = Some(name.to_owned()); + self.set_qualname(); let arg0 = self.varname(".0")?; @@ -4336,10 +4397,6 @@ impl Compiler<'_> { .line_index(self.current_source_range.start()) } - fn push_qualified_path(&mut self, name: &str) { - self.qualified_path.push(name.to_owned()); - } - fn mark_generator(&mut self) { self.current_code_info().flags |= bytecode::CodeFlags::IS_GENERATOR } From c3967bf8495958844b91c7f45960a2c01f4f4375 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 11 Jul 2025 13:18:23 +0300 Subject: [PATCH 019/176] Set timeout for CI (#5947) --- .github/workflows/ci.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fec9d4d838..c5e9344956 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -113,6 +113,7 @@ jobs: RUST_BACKTRACE: full name: Run rust tests runs-on: ${{ matrix.os }} + timeout-minutes: ${{ contains(matrix.os, 'windows') && 40 || 30 }} strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] @@ -175,6 +176,7 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Ensure compilation on various targets runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -237,6 +239,7 @@ jobs: RUST_BACKTRACE: full name: Run snippets and cpython tests runs-on: ${{ matrix.os }} + timeout-minutes: ${{ contains(matrix.os, 'windows') && 40 || 30 }} strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] @@ -344,6 +347,7 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Run tests under miri runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@master @@ -361,6 +365,7 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Check the WASM package and demo runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -421,6 +426,7 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Run snippets and cpython tests on wasm-wasi runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable From c4234c169207e31af61219fc8bba8271a6d8a16a Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Fri, 11 Jul 2025 22:43:08 +0900 Subject: [PATCH 020/176] SymbolTable::varnames, fblock (#5948) * SymbolTable::varnames * varname_cache copies it * fasthidden & static attributes * metadata * fblock --- compiler/codegen/src/compile.rs | 277 +++++++++++++++++++++------- compiler/codegen/src/ir.rs | 95 ++++++---- compiler/codegen/src/symboltable.rs | 40 +++- 3 files changed, 310 insertions(+), 102 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 5aef2020d0..6b8007c151 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -8,12 +8,39 @@ #![deny(clippy::cast_possible_truncation)] use crate::{ - IndexSet, ToPythonName, + IndexMap, IndexSet, ToPythonName, error::{CodegenError, CodegenErrorType, PatternUnreachableReason}, ir::{self, BlockIdx}, - symboltable::{self, SymbolFlags, SymbolScope, SymbolTable}, + symboltable::{self, SymbolFlags, SymbolScope, SymbolTable, SymbolTableType}, unparse::unparse_expr, }; + +const MAXBLOCKS: usize = 20; + +#[derive(Debug, Clone, Copy)] +pub enum FBlockType { + WhileLoop, + ForLoop, + TryExcept, + FinallyTry, + FinallyEnd, + With, + AsyncWith, + HandlerCleanup, + PopValue, + ExceptionHandler, + ExceptionGroupHandler, + AsyncComprehensionGenerator, + StopIteration, +} + +#[derive(Debug, Clone)] +pub struct FBlockInfo { + pub fb_type: FBlockType, + pub fb_block: BlockIdx, + pub fb_exit: BlockIdx, + // fb_datum is not needed in RustPython +} use itertools::Itertools; use malachite_bigint::BigInt; use num_complex::Complex; @@ -303,21 +330,27 @@ impl<'src> Compiler<'src> { fn new(opts: CompileOpts, source_code: SourceCode<'src>, code_name: String) -> Self { let module_code = ir::CodeInfo { flags: bytecode::CodeFlags::NEW_LOCALS, - posonlyarg_count: 0, - arg_count: 0, - kwonlyarg_count: 0, source_path: source_code.path.to_owned(), - first_line_number: OneIndexed::MIN, - obj_name: code_name.clone(), - qualname: Some(code_name), private: None, blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), - constants: IndexSet::default(), - name_cache: IndexSet::default(), - varname_cache: IndexSet::default(), - cellvar_cache: IndexSet::default(), - freevar_cache: IndexSet::default(), + metadata: ir::CodeUnitMetadata { + name: code_name.clone(), + qualname: Some(code_name), + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: IndexSet::default(), + cellvars: IndexSet::default(), + freevars: IndexSet::default(), + fast_hidden: IndexMap::default(), + argcount: 0, + posonlyargcount: 0, + kwonlyargcount: 0, + firstlineno: OneIndexed::MIN, + }, + static_attributes: None, + in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), }; Compiler { code_stack: vec![module_code], @@ -382,6 +415,9 @@ impl Compiler<'_> { let source_path = self.source_code.path.to_owned(); let first_line_number = self.get_source_line_number(); + // Get the private name from current scope if exists + let private = self.code_stack.last().and_then(|info| info.private.clone()); + let table = self.push_symbol_table(); let cellvar_cache = table @@ -399,30 +435,42 @@ impl Compiler<'_> { .map(|(var, _)| var.clone()) .collect(); + // Initialize varname_cache from SymbolTable::varnames + let varname_cache: IndexSet = table.varnames.iter().cloned().collect(); + // Qualname will be set later by set_qualname let qualname = None; - // Get the private name from current scope if exists - let private = self.code_stack.last().and_then(|info| info.private.clone()); + // Check if this is a class scope + let is_class_scope = table.typ == SymbolTableType::Class; let info = ir::CodeInfo { flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, source_path, - first_line_number, - obj_name, - qualname, private, - blocks: vec![ir::Block::default()], current_block: ir::BlockIdx(0), - constants: IndexSet::default(), - name_cache: IndexSet::default(), - varname_cache: IndexSet::default(), - cellvar_cache, - freevar_cache, + metadata: ir::CodeUnitMetadata { + name: obj_name, + qualname, + consts: IndexSet::default(), + names: IndexSet::default(), + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: IndexMap::default(), + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + }, + static_attributes: if is_class_scope { + Some(IndexSet::default()) + } else { + None + }, + in_inlined_comp: false, + fblock: Vec::with_capacity(MAXBLOCKS), }; self.code_stack.push(info); } @@ -435,10 +483,41 @@ impl Compiler<'_> { unwrap_internal(self, stack_top.finalize_code(self.opts.optimize)) } + /// Push a new fblock + // = compiler_push_fblock + fn push_fblock( + &mut self, + fb_type: FBlockType, + fb_block: BlockIdx, + fb_exit: BlockIdx, + ) -> CompileResult<()> { + let code = self.current_code_info(); + if code.fblock.len() >= MAXBLOCKS { + return Err(self.error(CodegenErrorType::SyntaxError( + "too many statically nested blocks".to_owned(), + ))); + } + code.fblock.push(FBlockInfo { + fb_type, + fb_block, + fb_exit, + }); + Ok(()) + } + + /// Pop an fblock + // = compiler_pop_fblock + fn pop_fblock(&mut self, _expected_type: FBlockType) -> FBlockInfo { + let code = self.current_code_info(); + // TODO: Add assertion to check expected type matches + // assert!(matches!(fblock.fb_type, expected_type)); + code.fblock.pop().expect("fblock stack underflow") + } + // could take impl Into>, but everything is borrowed from ast structs; we never // actually have a `String` to pass fn name(&mut self, name: &str) -> bytecode::NameIdx { - self._name_inner(name, |i| &mut i.name_cache) + self._name_inner(name, |i| &mut i.metadata.names) } fn varname(&mut self, name: &str) -> CompileResult { if Compiler::is_forbidden_arg_name(name) { @@ -446,7 +525,7 @@ impl Compiler<'_> { "cannot assign to {name}", )))); } - Ok(self._name_inner(name, |i| &mut i.varname_cache)) + Ok(self._name_inner(name, |i| &mut i.metadata.varnames)) } fn _name_inner( &mut self, @@ -464,14 +543,14 @@ impl Compiler<'_> { /// Set the qualified name for the current code object, based on CPython's compiler_set_qualname fn set_qualname(&mut self) -> String { let qualname = self.make_qualname(); - self.current_code_info().qualname = Some(qualname.clone()); + self.current_code_info().metadata.qualname = Some(qualname.clone()); qualname } fn make_qualname(&mut self) -> String { let stack_size = self.code_stack.len(); assert!(stack_size >= 1); - let current_obj_name = self.current_code_info().obj_name.clone(); + let current_obj_name = self.current_code_info().metadata.name.clone(); // If we're at the module level (stack_size == 1), qualname is just the name if stack_size <= 1 { @@ -483,7 +562,7 @@ impl Compiler<'_> { let mut parent = &self.code_stack[parent_idx]; // If parent is a type parameter scope, look at grandparent - if parent.obj_name.starts_with(" { current_obj_name } else { // Check parent scope type - let parent_obj_name = &parent.obj_name; + let parent_obj_name = &parent.metadata.name; // Determine if parent is a function-like scope let is_function_parent = parent.flags.contains(bytecode::CodeFlags::IS_OPTIMIZED) @@ -536,12 +615,12 @@ impl Compiler<'_> { if is_function_parent { // For functions, append . to parent qualname // Use parent's qualname if available, otherwise use parent_obj_name - let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + let parent_qualname = parent.metadata.qualname.as_ref().unwrap_or(parent_obj_name); format!("{parent_qualname}..{current_obj_name}") } else { // For classes and other scopes, use parent's qualname directly // Use parent's qualname if available, otherwise use parent_obj_name - let parent_qualname = parent.qualname.as_ref().unwrap_or(parent_obj_name); + let parent_qualname = parent.metadata.qualname.as_ref().unwrap_or(parent_obj_name); if parent_qualname == "" { // Module level, just use the name current_obj_name @@ -706,7 +785,7 @@ impl Compiler<'_> { .ok_or_else(|| InternalError::MissingSymbol(name.to_string())), ); let info = self.code_stack.last_mut().unwrap(); - let mut cache = &mut info.name_cache; + let mut cache = &mut info.metadata.names; enum NameOpType { Fast, Global, @@ -715,7 +794,7 @@ impl Compiler<'_> { } let op_typ = match symbol.scope { SymbolScope::Local if self.ctx.in_func() => { - cache = &mut info.varname_cache; + cache = &mut info.metadata.varnames; NameOpType::Fast } SymbolScope::GlobalExplicit => NameOpType::Global, @@ -725,16 +804,16 @@ impl Compiler<'_> { SymbolScope::GlobalImplicit | SymbolScope::Unknown => NameOpType::Local, SymbolScope::Local => NameOpType::Local, SymbolScope::Free => { - cache = &mut info.freevar_cache; + cache = &mut info.metadata.freevars; NameOpType::Deref } SymbolScope::Cell => { - cache = &mut info.cellvar_cache; + cache = &mut info.metadata.cellvars; NameOpType::Deref } // TODO: is this right? SymbolScope::TypeParams => { // Type parameters are always cell variables - cache = &mut info.cellvar_cache; + cache = &mut info.metadata.cellvars; NameOpType::Deref } // SymbolScope::Unknown => NameOpType::Global, }; @@ -750,7 +829,7 @@ impl Compiler<'_> { .get_index_of(name.as_ref()) .unwrap_or_else(|| cache.insert_full(name.into_owned()).0); if let SymbolScope::Free = symbol.scope { - idx += info.cellvar_cache.len(); + idx += info.metadata.cellvars.len(); } let op = match op_typ { NameOpType::Fast => match usage { @@ -1067,26 +1146,62 @@ impl Compiler<'_> { self.switch_to_block(after_block); } } - Stmt::Break(_) => match self.ctx.loop_data { - Some((_, end)) => { - emit!(self, Instruction::Break { target: end }); - } - None => { - return Err( - self.error_ranged(CodegenErrorType::InvalidBreak, statement.range()) - ); - } - }, - Stmt::Continue(_) => match self.ctx.loop_data { - Some((start, _)) => { - emit!(self, Instruction::Continue { target: start }); + Stmt::Break(_) => { + // Find the innermost loop in fblock stack + let found_loop = { + let code = self.current_code_info(); + let mut result = None; + for i in (0..code.fblock.len()).rev() { + match code.fblock[i].fb_type { + FBlockType::WhileLoop | FBlockType::ForLoop => { + result = Some(code.fblock[i].fb_exit); + break; + } + _ => continue, + } + } + result + }; + + match found_loop { + Some(exit_block) => { + emit!(self, Instruction::Break { target: exit_block }); + } + None => { + return Err( + self.error_ranged(CodegenErrorType::InvalidBreak, statement.range()) + ); + } } - None => { - return Err( - self.error_ranged(CodegenErrorType::InvalidContinue, statement.range()) - ); + } + Stmt::Continue(_) => { + // Find the innermost loop in fblock stack + let found_loop = { + let code = self.current_code_info(); + let mut result = None; + for i in (0..code.fblock.len()).rev() { + match code.fblock[i].fb_type { + FBlockType::WhileLoop | FBlockType::ForLoop => { + result = Some(code.fblock[i].fb_block); + break; + } + _ => continue, + } + } + result + }; + + match found_loop { + Some(loop_block) => { + emit!(self, Instruction::Continue { target: loop_block }); + } + None => { + return Err( + self.error_ranged(CodegenErrorType::InvalidContinue, statement.range()) + ); + } } - }, + } Stmt::Return(StmtReturn { value, .. }) => { if !self.ctx.in_func() { return Err( @@ -1639,7 +1754,8 @@ impl Compiler<'_> { let (doc_str, body) = split_doc(body, &self.opts); self.current_code_info() - .constants + .metadata + .consts .insert_full(ConstantData::None); // Emit RESUME instruction at function start @@ -1760,10 +1876,12 @@ impl Compiler<'_> { ); let parent_code = self.code_stack.last().unwrap(); let vars = match symbol.scope { - SymbolScope::Free => &parent_code.freevar_cache, - SymbolScope::Cell => &parent_code.cellvar_cache, - SymbolScope::TypeParams => &parent_code.cellvar_cache, - _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => &parent_code.freevar_cache, + SymbolScope::Free => &parent_code.metadata.freevars, + SymbolScope::Cell => &parent_code.metadata.cellvars, + SymbolScope::TypeParams => &parent_code.metadata.cellvars, + _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { + &parent_code.metadata.freevars + } x => unreachable!( "var {} in a {:?} should be free or cell but it's {:?}", var, table.typ, x @@ -1771,7 +1889,7 @@ impl Compiler<'_> { }; let mut idx = vars.get_index_of(var).unwrap(); if let SymbolScope::Free = symbol.scope { - idx += parent_code.cellvar_cache.len(); + idx += parent_code.metadata.cellvars.len(); } emit!(self, Instruction::LoadClosure(idx.to_u32())) } @@ -1896,7 +2014,8 @@ impl Compiler<'_> { .code_stack .last_mut() .unwrap() - .cellvar_cache + .metadata + .cellvars .iter() .position(|var| *var == "__class__"); @@ -2005,6 +2124,9 @@ impl Compiler<'_> { emit!(self, Instruction::SetupLoop); self.switch_to_block(while_block); + // Push fblock for while loop + self.push_fblock(FBlockType::WhileLoop, while_block, after_block)?; + self.compile_jump_if(test, false, else_block)?; let was_in_loop = self.ctx.loop_data.replace((while_block, after_block)); @@ -2017,6 +2139,9 @@ impl Compiler<'_> { } ); self.switch_to_block(else_block); + + // Pop fblock + self.pop_fblock(FBlockType::WhileLoop); emit!(self, Instruction::PopBlock); self.compile_statements(orelse)?; self.switch_to_block(after_block); @@ -2127,6 +2252,10 @@ impl Compiler<'_> { emit!(self, Instruction::GetAIter); self.switch_to_block(for_block); + + // Push fblock for async for loop + self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; + emit!( self, Instruction::SetupExcept { @@ -2149,6 +2278,10 @@ impl Compiler<'_> { emit!(self, Instruction::GetIter); self.switch_to_block(for_block); + + // Push fblock for for loop + self.push_fblock(FBlockType::ForLoop, for_block, after_block)?; + emit!(self, Instruction::ForIter { target: else_block }); // Start of loop iteration, set targets: @@ -2161,6 +2294,10 @@ impl Compiler<'_> { emit!(self, Instruction::Jump { target: for_block }); self.switch_to_block(else_block); + + // Pop fblock + self.pop_fblock(FBlockType::ForLoop); + if is_async { emit!(self, Instruction::EndAsyncFor); } @@ -3677,7 +3814,8 @@ impl Compiler<'_> { }; self.current_code_info() - .constants + .metadata + .consts .insert_full(ConstantData::None); self.compile_expression(body)?; @@ -4138,6 +4276,9 @@ impl Compiler<'_> { // Create magnificent function : self.push_output(flags, 1, 1, 0, name.to_owned()); + // Mark that we're in an inlined comprehension + self.current_code_info().in_inlined_comp = true; + // Set qualname for comprehension self.set_qualname(); @@ -4330,7 +4471,7 @@ impl Compiler<'_> { fn arg_constant(&mut self, constant: ConstantData) -> u32 { let info = self.current_code_info(); - info.constants.insert_full(constant).0.to_u32() + info.metadata.consts.insert_full(constant).0.to_u32() } fn emit_load_const(&mut self, constant: ConstantData) { diff --git a/compiler/codegen/src/ir.rs b/compiler/codegen/src/ir.rs index 852051777e..f2299892b3 100644 --- a/compiler/codegen/src/ir.rs +++ b/compiler/codegen/src/ir.rs @@ -1,11 +1,29 @@ use std::ops; -use crate::IndexSet; use crate::error::InternalError; +use crate::{IndexMap, IndexSet}; use ruff_source_file::{OneIndexed, SourceLocation}; use rustpython_compiler_core::bytecode::{ CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label, OpArg, }; + +/// Metadata for a code unit +// = _PyCompile_CodeUnitMetadata +#[derive(Clone, Debug)] +pub struct CodeUnitMetadata { + pub name: String, // u_name (obj_name) + pub qualname: Option, // u_qualname + pub consts: IndexSet, // u_consts + pub names: IndexSet, // u_names + pub varnames: IndexSet, // u_varnames + pub cellvars: IndexSet, // u_cellvars + pub freevars: IndexSet, // u_freevars + pub fast_hidden: IndexMap, // u_fast_hidden + pub argcount: u32, // u_argcount + pub posonlyargcount: u32, // u_posonlyargcount + pub kwonlyargcount: u32, // u_kwonlyargcount + pub firstlineno: OneIndexed, // u_firstlineno +} // use rustpython_parser_core::source_code::{LineNumber, SourceLocation}; #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -67,22 +85,22 @@ impl Default for Block { pub struct CodeInfo { pub flags: CodeFlags, - pub posonlyarg_count: u32, // Number of positional-only arguments - pub arg_count: u32, - pub kwonlyarg_count: u32, pub source_path: String, - pub first_line_number: OneIndexed, - pub obj_name: String, // Name of the object that created this code object - pub qualname: Option, // Qualified name of the object pub private: Option, // For private name mangling, mostly for class pub blocks: Vec, pub current_block: BlockIdx, - pub constants: IndexSet, - pub name_cache: IndexSet, - pub varname_cache: IndexSet, - pub cellvar_cache: IndexSet, - pub freevar_cache: IndexSet, + + pub metadata: CodeUnitMetadata, + + // For class scopes: attributes accessed via self.X + pub static_attributes: Option>, + + // True if compiling an inlined comprehension + pub in_inlined_comp: bool, + + // Block stack for tracking nested control structures + pub fblock: Vec, } impl CodeInfo { pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { @@ -95,24 +113,32 @@ impl CodeInfo { let Self { flags, - posonlyarg_count, - arg_count, - kwonlyarg_count, source_path, - first_line_number, - obj_name, - qualname, private: _, // private is only used during compilation mut blocks, current_block: _, - constants, - name_cache, - varname_cache, - cellvar_cache, - freevar_cache, + metadata, + static_attributes: _, + in_inlined_comp: _, + fblock: _, } = self; + let CodeUnitMetadata { + name: obj_name, + qualname, + consts: constants, + names: name_cache, + varnames: varname_cache, + cellvars: cellvar_cache, + freevars: freevar_cache, + fast_hidden: _, + argcount: arg_count, + posonlyargcount: posonlyarg_count, + kwonlyargcount: kwonlyarg_count, + firstlineno: first_line_number, + } = metadata; + let mut instructions = Vec::new(); let mut locations = Vec::new(); @@ -182,21 +208,23 @@ impl CodeInfo { } fn cell2arg(&self) -> Option> { - if self.cellvar_cache.is_empty() { + if self.metadata.cellvars.is_empty() { return None; } - let total_args = self.arg_count - + self.kwonlyarg_count + let total_args = self.metadata.argcount + + self.metadata.kwonlyargcount + self.flags.contains(CodeFlags::HAS_VARARGS) as u32 + self.flags.contains(CodeFlags::HAS_VARKEYWORDS) as u32; let mut found_cellarg = false; let cell2arg = self - .cellvar_cache + .metadata + .cellvars .iter() .map(|var| { - self.varname_cache + self.metadata + .varnames .get_index_of(var) // check that it's actually an arg .filter(|i| *i < total_args as usize) @@ -302,18 +330,19 @@ impl CodeInfo { impl InstrDisplayContext for CodeInfo { type Constant = ConstantData; fn get_constant(&self, i: usize) -> &ConstantData { - &self.constants[i] + &self.metadata.consts[i] } fn get_name(&self, i: usize) -> &str { - self.name_cache[i].as_ref() + self.metadata.names[i].as_ref() } fn get_varname(&self, i: usize) -> &str { - self.varname_cache[i].as_ref() + self.metadata.varnames[i].as_ref() } fn get_cell_name(&self, i: usize) -> &str { - self.cellvar_cache + self.metadata + .cellvars .get_index(i) - .unwrap_or_else(|| &self.freevar_cache[i - self.cellvar_cache.len()]) + .unwrap_or_else(|| &self.metadata.freevars[i - self.metadata.cellvars.len()]) .as_ref() } } diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 66dbff326a..52b6bae644 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -45,6 +45,9 @@ pub struct SymbolTable { /// A list of sub-scopes in the order as found in the /// AST nodes. pub sub_tables: Vec, + + /// Variable names in definition order (parameters first, then locals) + pub varnames: Vec, } impl SymbolTable { @@ -56,6 +59,7 @@ impl SymbolTable { is_nested, symbols: IndexMap::default(), sub_tables: vec![], + varnames: Vec::new(), } } @@ -573,6 +577,8 @@ struct SymbolTableBuilder<'src> { tables: Vec, future_annotations: bool, source_code: SourceCode<'src>, + // Current scope's varnames being collected (temporary storage) + current_varnames: Vec, } /// Enum to indicate in what mode an expression @@ -595,6 +601,7 @@ impl<'src> SymbolTableBuilder<'src> { tables: vec![], future_annotations: false, source_code, + current_varnames: Vec::new(), }; this.enter_scope("top", SymbolTableType::Module, 0); this @@ -605,6 +612,8 @@ impl SymbolTableBuilder<'_> { fn finish(mut self) -> Result { assert_eq!(self.tables.len(), 1); let mut symbol_table = self.tables.pop().unwrap(); + // Save varnames for the top-level module scope + symbol_table.varnames = self.current_varnames; analyze_symbol_table(&mut symbol_table)?; Ok(symbol_table) } @@ -617,11 +626,15 @@ impl SymbolTableBuilder<'_> { .unwrap_or(false); let table = SymbolTable::new(name.to_owned(), typ, line_number, is_nested); self.tables.push(table); + // Clear current_varnames for the new scope + self.current_varnames.clear(); } /// Pop symbol table and add to sub table of parent table. fn leave_scope(&mut self) { - let table = self.tables.pop().unwrap(); + let mut table = self.tables.pop().unwrap(); + // Save the collected varnames to the symbol table + table.varnames = std::mem::take(&mut self.current_varnames); self.tables.last_mut().unwrap().sub_tables.push(table); } @@ -1533,18 +1546,43 @@ impl SymbolTableBuilder<'_> { } SymbolUsage::Parameter => { flags.insert(SymbolFlags::PARAMETER); + // Parameters are always added to varnames first + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } } SymbolUsage::AnnotationParameter => { flags.insert(SymbolFlags::PARAMETER | SymbolFlags::ANNOTATED); + // Annotated parameters are also added to varnames + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } } SymbolUsage::AnnotationAssigned => { flags.insert(SymbolFlags::ASSIGNED | SymbolFlags::ANNOTATED); } SymbolUsage::Assigned => { flags.insert(SymbolFlags::ASSIGNED); + // Local variables (assigned) are added to varnames if they are local scope + // and not already in varnames + if symbol.scope == SymbolScope::Local { + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } + } } SymbolUsage::AssignedNamedExprInComprehension => { flags.insert(SymbolFlags::ASSIGNED | SymbolFlags::ASSIGNED_IN_COMPREHENSION); + // Named expressions in comprehensions might also be locals + if symbol.scope == SymbolScope::Local { + let name_str = symbol.name.clone(); + if !self.current_varnames.contains(&name_str) { + self.current_varnames.push(name_str); + } + } } SymbolUsage::Global => { symbol.scope = SymbolScope::GlobalExplicit; From f19478edecba5355e827a7bf1fc40da6b85180f8 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Fri, 11 Jul 2025 19:10:30 +0300 Subject: [PATCH 021/176] Update operator from 3.13.5 (#5935) --- Lib/operator.py | 10 ++--- Lib/test/test_operator.py | 88 +++++++++++++++++++++++++++++++++++++++ vm/src/stdlib/operator.rs | 9 ++-- 3 files changed, 98 insertions(+), 9 deletions(-) diff --git a/Lib/operator.py b/Lib/operator.py index 30116c1189..02ccdaa13d 100644 --- a/Lib/operator.py +++ b/Lib/operator.py @@ -239,7 +239,7 @@ class attrgetter: """ __slots__ = ('_attrs', '_call') - def __init__(self, attr, *attrs): + def __init__(self, attr, /, *attrs): if not attrs: if not isinstance(attr, str): raise TypeError('attribute name must be a string') @@ -257,7 +257,7 @@ def func(obj): return tuple(getter(obj) for getter in getters) self._call = func - def __call__(self, obj): + def __call__(self, obj, /): return self._call(obj) def __repr__(self): @@ -276,7 +276,7 @@ class itemgetter: """ __slots__ = ('_items', '_call') - def __init__(self, item, *items): + def __init__(self, item, /, *items): if not items: self._items = (item,) def func(obj): @@ -288,7 +288,7 @@ def func(obj): return tuple(obj[i] for i in items) self._call = func - def __call__(self, obj): + def __call__(self, obj, /): return self._call(obj) def __repr__(self): @@ -315,7 +315,7 @@ def __init__(self, name, /, *args, **kwargs): self._args = args self._kwargs = kwargs - def __call__(self, obj): + def __call__(self, obj, /): return getattr(obj, self._name)(*self._args, **self._kwargs) def __repr__(self): diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py index 1db738d228..05b7a7462d 100644 --- a/Lib/test/test_operator.py +++ b/Lib/test/test_operator.py @@ -1,6 +1,9 @@ import unittest +import inspect import pickle import sys +from decimal import Decimal +from fractions import Fraction from test import support from test.support import import_helper @@ -508,6 +511,44 @@ def __getitem__(self, other): return 5 # so that C is a sequence self.assertEqual(operator.ixor (c, 5), "ixor") self.assertEqual(operator.iconcat (c, c), "iadd") + def test_iconcat_without_getitem(self): + operator = self.module + + msg = "'int' object can't be concatenated" + with self.assertRaisesRegex(TypeError, msg): + operator.iconcat(1, 0.5) + + def test_index(self): + operator = self.module + class X: + def __index__(self): + return 1 + + self.assertEqual(operator.index(X()), 1) + self.assertEqual(operator.index(0), 0) + self.assertEqual(operator.index(1), 1) + self.assertEqual(operator.index(2), 2) + with self.assertRaises((AttributeError, TypeError)): + operator.index(1.5) + with self.assertRaises((AttributeError, TypeError)): + operator.index(Fraction(3, 7)) + with self.assertRaises((AttributeError, TypeError)): + operator.index(Decimal(1)) + with self.assertRaises((AttributeError, TypeError)): + operator.index(None) + + def test_not_(self): + operator = self.module + class C: + def __bool__(self): + raise SyntaxError + self.assertRaises(TypeError, operator.not_) + self.assertRaises(SyntaxError, operator.not_, C()) + self.assertFalse(operator.not_(5)) + self.assertFalse(operator.not_([0])) + self.assertTrue(operator.not_(0)) + self.assertTrue(operator.not_([])) + def test_length_hint(self): operator = self.module class X(object): @@ -533,6 +574,13 @@ def __length_hint__(self): with self.assertRaises(LookupError): operator.length_hint(X(LookupError)) + class Y: pass + + msg = "'str' object cannot be interpreted as an integer" + with self.assertRaisesRegex(TypeError, msg): + operator.length_hint(X(2), "abc") + self.assertEqual(operator.length_hint(Y(), 10), 10) + def test_call(self): operator = self.module @@ -555,6 +603,31 @@ def test_dunder_is_original(self): if dunder: self.assertIs(dunder, orig) + @support.requires_docstrings + def test_attrgetter_signature(self): + operator = self.module + sig = inspect.signature(operator.attrgetter) + self.assertEqual(str(sig), '(attr, /, *attrs)') + sig = inspect.signature(operator.attrgetter('x', 'z', 'y')) + self.assertEqual(str(sig), '(obj, /)') + + @support.requires_docstrings + def test_itemgetter_signature(self): + operator = self.module + sig = inspect.signature(operator.itemgetter) + self.assertEqual(str(sig), '(item, /, *items)') + sig = inspect.signature(operator.itemgetter(2, 3, 5)) + self.assertEqual(str(sig), '(obj, /)') + + @support.requires_docstrings + def test_methodcaller_signature(self): + operator = self.module + sig = inspect.signature(operator.methodcaller) + self.assertEqual(str(sig), '(name, /, *args, **kwargs)') + sig = inspect.signature(operator.methodcaller('foo', 2, y=3)) + self.assertEqual(str(sig), '(obj, /)') + + class PyOperatorTestCase(OperatorTestCase, unittest.TestCase): module = py_operator @@ -562,6 +635,21 @@ class PyOperatorTestCase(OperatorTestCase, unittest.TestCase): class COperatorTestCase(OperatorTestCase, unittest.TestCase): module = c_operator + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_attrgetter_signature(self): + super().test_attrgetter_signature() + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_itemgetter_signature(self): + super().test_itemgetter_signature() + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_methodcaller_signature(self): + super().test_methodcaller_signature() + class OperatorPickleTestCase: def copy(self, obj, proto): diff --git a/vm/src/stdlib/operator.rs b/vm/src/stdlib/operator.rs index c33b9a47b0..0d5a309201 100644 --- a/vm/src/stdlib/operator.rs +++ b/vm/src/stdlib/operator.rs @@ -225,7 +225,7 @@ mod _operator { .map(|v| { if !v.fast_isinstance(vm.ctx.types.int_type) { return Err(vm.new_type_error(format!( - "'{}' type cannot be interpreted as an integer", + "'{}' object cannot be interpreted as an integer", v.class().name() ))); } @@ -253,9 +253,10 @@ mod _operator { if !a.class().has_attr(identifier!(vm, __getitem__)) || a.fast_isinstance(vm.ctx.types.dict_type) { - return Err( - vm.new_type_error(format!("{} object can't be concatenated", a.class().name())) - ); + return Err(vm.new_type_error(format!( + "'{}' object can't be concatenated", + a.class().name() + ))); } vm._iadd(&a, &b) } From 3f9a5fddbbe2c29b843e57557812cefd5272180f Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:18:08 +0300 Subject: [PATCH 022/176] Don't skip non hanging test (#5951) --- Lib/test/test_contains.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_contains.py b/Lib/test/test_contains.py index c533311572..471d04a76c 100644 --- a/Lib/test/test_contains.py +++ b/Lib/test/test_contains.py @@ -36,7 +36,6 @@ def test_common_tests(self): self.assertRaises(TypeError, lambda: None in 'abc') - @unittest.skip("TODO: RUSTPYTHON, hangs") def test_builtin_sequence_types(self): # a collection of tests on builtin sequence types a = range(10) From 1303ace453192996a6cfe649ee34c63cd6129474 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:18:31 +0300 Subject: [PATCH 023/176] Update textwrap from 3.13.5 (#5952) --- Lib/textwrap.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 841de9baec..7ca393d1c3 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -63,10 +63,7 @@ class TextWrapper: Append to the last line of truncated text. """ - unicode_whitespace_trans = {} - uspace = ord(' ') - for x in _whitespace: - unicode_whitespace_trans[ord(x)] = uspace + unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' ')) # This funky little regex is just the trick for splitting # text up into word-wrappable chunks. E.g. @@ -479,13 +476,19 @@ def indent(text, prefix, predicate=None): consist solely of whitespace characters. """ if predicate is None: - def predicate(line): - return line.strip() - - def prefixed_lines(): - for line in text.splitlines(True): - yield (prefix + line if predicate(line) else line) - return ''.join(prefixed_lines()) + # str.splitlines(True) doesn't produce empty string. + # ''.splitlines(True) => [] + # 'foo\n'.splitlines(True) => ['foo\n'] + # So we can use just `not s.isspace()` here. + predicate = lambda s: not s.isspace() + + prefixed_lines = [] + for line in text.splitlines(True): + if predicate(line): + prefixed_lines.append(prefix) + prefixed_lines.append(line) + + return ''.join(prefixed_lines) if __name__ == "__main__": From 3ef0cfc50c3b6fa5a1508f69a3a5408984d46db9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sat, 12 Jul 2025 19:28:22 +0900 Subject: [PATCH 024/176] compiler enter_scope (#5950) * enter_scope * drop_class_free * push_output based on enter_scope --- compiler/codegen/src/compile.rs | 231 ++++++++++++++++++++++------ compiler/codegen/src/symboltable.rs | 38 +++++ 2 files changed, 219 insertions(+), 50 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 6b8007c151..14495499c5 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -176,7 +176,7 @@ pub fn compile_program( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program(ast, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); trace!("Compilation completed: {code:?}"); Ok(code) } @@ -191,7 +191,7 @@ pub fn compile_program_single( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program_single(&ast.body, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); trace!("Compilation completed: {code:?}"); Ok(code) } @@ -205,7 +205,7 @@ pub fn compile_block_expression( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_block_expr(&ast.body, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); trace!("Compilation completed: {code:?}"); Ok(code) } @@ -219,7 +219,7 @@ pub fn compile_expression( .map_err(|e| e.into_codegen_error(source_code.path.to_owned()))?; let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_eval(ast, symbol_table)?; - let code = compiler.pop_code_object(); + let code = compiler.exit_scope(); Ok(code) } @@ -404,55 +404,121 @@ impl Compiler<'_> { self.symbol_table_stack.pop().expect("compiler bug") } - fn push_output( + /// Enter a new scope + // = compiler_enter_scope + fn enter_scope( &mut self, - flags: bytecode::CodeFlags, - posonlyarg_count: u32, - arg_count: u32, - kwonlyarg_count: u32, - obj_name: String, - ) { + name: &str, + scope_type: SymbolTableType, + key: usize, // In RustPython, we use the index in symbol_table_stack as key + lineno: u32, + ) -> CompileResult<()> { + // Create location + let location = ruff_source_file::SourceLocation { + row: OneIndexed::new(lineno as usize).unwrap_or(OneIndexed::MIN), + column: OneIndexed::new(1).unwrap(), + }; + + // Allocate a new compiler unit + + // In Rust, we'll create the structure directly let source_path = self.source_code.path.to_owned(); - let first_line_number = self.get_source_line_number(); - // Get the private name from current scope if exists - let private = self.code_stack.last().and_then(|info| info.private.clone()); + // Lookup symbol table entry using key (_PySymtable_Lookup) + let ste = if key < self.symbol_table_stack.len() { + &self.symbol_table_stack[key] + } else { + return Err(self.error(CodegenErrorType::SyntaxError( + "unknown symbol table entry".to_owned(), + ))); + }; - let table = self.push_symbol_table(); + // Use varnames from symbol table (already collected in definition order) + let varname_cache: IndexSet = ste.varnames.iter().cloned().collect(); - let cellvar_cache = table + // Build cellvars using dictbytype (CELL scope, sorted) + let mut cellvar_cache = IndexSet::default(); + let mut cell_names: Vec<_> = ste .symbols .iter() .filter(|(_, s)| s.scope == SymbolScope::Cell) - .map(|(var, _)| var.clone()) + .map(|(name, _)| name.clone()) .collect(); - let freevar_cache = table + cell_names.sort(); + for name in cell_names { + cellvar_cache.insert(name); + } + + // Handle implicit __class__ cell if needed + if ste.needs_class_closure { + // Cook up an implicit __class__ cell + debug_assert_eq!(scope_type, SymbolTableType::Class); + cellvar_cache.insert("__class__".to_string()); + } + + // Handle implicit __classdict__ cell if needed + if ste.needs_classdict { + // Cook up an implicit __classdict__ cell + debug_assert_eq!(scope_type, SymbolTableType::Class); + cellvar_cache.insert("__classdict__".to_string()); + } + + // Build freevars using dictbytype (FREE scope, offset by cellvars size) + let mut freevar_cache = IndexSet::default(); + let mut free_names: Vec<_> = ste .symbols .iter() .filter(|(_, s)| { s.scope == SymbolScope::Free || s.flags.contains(SymbolFlags::FREE_CLASS) }) - .map(|(var, _)| var.clone()) + .map(|(name, _)| name.clone()) .collect(); + free_names.sort(); + for name in free_names { + freevar_cache.insert(name); + } + + // Initialize u_metadata fields + let (flags, posonlyarg_count, arg_count, kwonlyarg_count) = match scope_type { + SymbolTableType::Module => (bytecode::CodeFlags::empty(), 0, 0, 0), + SymbolTableType::Class => (bytecode::CodeFlags::empty(), 0, 0, 0), + SymbolTableType::Function | SymbolTableType::Lambda => ( + bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, + 0, // Will be set later in enter_function + 0, // Will be set later in enter_function + 0, // Will be set later in enter_function + ), + SymbolTableType::Comprehension => ( + bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, + 0, + 1, // comprehensions take one argument (.0) + 0, + ), + SymbolTableType::TypeParams => ( + bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, + 0, + 0, + 0, + ), + }; - // Initialize varname_cache from SymbolTable::varnames - let varname_cache: IndexSet = table.varnames.iter().cloned().collect(); - - // Qualname will be set later by set_qualname - let qualname = None; - - // Check if this is a class scope - let is_class_scope = table.typ == SymbolTableType::Class; + // Get private name from parent scope + let private = if !self.code_stack.is_empty() { + self.code_stack.last().unwrap().private.clone() + } else { + None + }; - let info = ir::CodeInfo { + // Create the new compilation unit + let code_info = ir::CodeInfo { flags, - source_path, + source_path: source_path.clone(), private, blocks: vec![ir::Block::default()], - current_block: ir::BlockIdx(0), + current_block: BlockIdx(0), metadata: ir::CodeUnitMetadata { - name: obj_name, - qualname, + name: name.to_owned(), + qualname: None, // Will be set below consts: IndexSet::default(), names: IndexSet::default(), varnames: varname_cache, @@ -462,9 +528,9 @@ impl Compiler<'_> { argcount: arg_count, posonlyargcount: posonlyarg_count, kwonlyargcount: kwonlyarg_count, - firstlineno: first_line_number, + firstlineno: OneIndexed::new(lineno as usize).unwrap_or(OneIndexed::MIN), }, - static_attributes: if is_class_scope { + static_attributes: if scope_type == SymbolTableType::Class { Some(IndexSet::default()) } else { None @@ -472,10 +538,83 @@ impl Compiler<'_> { in_inlined_comp: false, fblock: Vec::with_capacity(MAXBLOCKS), }; - self.code_stack.push(info); + + // Push the old compiler unit on the stack (like PyCapsule) + // This happens before setting qualname + self.code_stack.push(code_info); + + // Set qualname after pushing (uses compiler_set_qualname logic) + if scope_type != SymbolTableType::Module { + self.set_qualname(); + } + + // Emit RESUME instruction + let _resume_loc = if scope_type == SymbolTableType::Module { + // Module scope starts with lineno 0 + ruff_source_file::SourceLocation { + row: OneIndexed::MIN, + column: OneIndexed::MIN, + } + } else { + location + }; + + // Set the source range for the RESUME instruction + // For now, just use an empty range at the beginning + self.current_source_range = TextRange::default(); + emit!( + self, + Instruction::Resume { + arg: bytecode::ResumeType::AtFuncStart as u32 + } + ); + + if scope_type == SymbolTableType::Module { + // This would be loc.lineno = -1 in CPython + // We handle this differently in RustPython + } + + Ok(()) + } + + fn push_output( + &mut self, + flags: bytecode::CodeFlags, + posonlyarg_count: u32, + arg_count: u32, + kwonlyarg_count: u32, + obj_name: String, + ) { + // First push the symbol table + let table = self.push_symbol_table(); + let scope_type = table.typ; + + // The key is the current position in the symbol table stack + let key = self.symbol_table_stack.len() - 1; + + // Get the line number + let lineno = self.get_source_line_number().get(); + + // Call enter_scope which does most of the work + if let Err(e) = self.enter_scope(&obj_name, scope_type, key, lineno.to_u32()) { + // In the current implementation, push_output doesn't return an error, + // so we panic here. This maintains the same behavior. + panic!("enter_scope failed: {e:?}"); + } + + // Override the values that push_output sets explicitly + // enter_scope sets default values based on scope_type, but push_output + // allows callers to specify exact values + if let Some(info) = self.code_stack.last_mut() { + info.flags = flags; + info.metadata.argcount = arg_count; + info.metadata.posonlyargcount = posonlyarg_count; + info.metadata.kwonlyargcount = kwonlyarg_count; + } } - fn pop_code_object(&mut self) -> CodeObject { + // compiler_exit_scope + fn exit_scope(&mut self) -> CodeObject { let table = self.pop_symbol_table(); assert!(table.sub_tables.is_empty()); let pop = self.code_stack.pop(); @@ -755,7 +894,7 @@ impl Compiler<'_> { } fn mangle<'a>(&self, name: &'a str) -> Cow<'a, str> { - // Use u_private from current code unit for name mangling + // Use private from current code unit for name mangling let private = self .code_stack .last() @@ -1758,14 +1897,6 @@ impl Compiler<'_> { .consts .insert_full(ConstantData::None); - // Emit RESUME instruction at function start - emit!( - self, - Instruction::Resume { - arg: bytecode::ResumeType::AtFuncStart as u32 - } - ); - self.compile_statements(body)?; // Emit None at end: @@ -1778,7 +1909,7 @@ impl Compiler<'_> { } } - let code = self.pop_code_object(); + let code = self.exit_scope(); self.ctx = prev_ctx; // Prepare generic type parameters: @@ -2030,7 +2161,7 @@ impl Compiler<'_> { self.emit_return_value(); - let code = self.pop_code_object(); + let code = self.exit_scope(); self.ctx = prev_ctx; emit!(self, Instruction::LoadBuildClass); @@ -3820,7 +3951,7 @@ impl Compiler<'_> { self.compile_expression(body)?; self.emit_return_value(); - let code = self.pop_code_object(); + let code = self.exit_scope(); if self.build_closure(&code) { func_flags |= bytecode::MakeFunctionFlags::CLOSURE; } @@ -4369,7 +4500,7 @@ impl Compiler<'_> { self.emit_return_value(); // Fetch code for listcomp function: - let code = self.pop_code_object(); + let code = self.exit_scope(); self.ctx = prev_ctx; @@ -5076,7 +5207,7 @@ mod tests { .unwrap(); let mut compiler = Compiler::new(opts, source_code, "".to_owned()); compiler.compile_program(&ast, symbol_table).unwrap(); - compiler.pop_code_object() + compiler.exit_scope() } macro_rules! assert_dis_snapshot { diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 52b6bae644..16a65bca11 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -48,6 +48,12 @@ pub struct SymbolTable { /// Variable names in definition order (parameters first, then locals) pub varnames: Vec, + + /// Whether this class scope needs an implicit __class__ cell + pub needs_class_closure: bool, + + /// Whether this class scope needs an implicit __classdict__ cell + pub needs_classdict: bool, } impl SymbolTable { @@ -60,6 +66,8 @@ impl SymbolTable { symbols: IndexMap::default(), sub_tables: vec![], varnames: Vec::new(), + needs_class_closure: false, + needs_classdict: false, } } @@ -228,6 +236,30 @@ fn analyze_symbol_table(symbol_table: &mut SymbolTable) -> SymbolTableResult { analyzer.analyze_symbol_table(symbol_table) } +/* Drop __class__ and __classdict__ from free variables in class scope + and set the appropriate flags. Equivalent to CPython's drop_class_free(). + See: https://github.com/python/cpython/blob/main/Python/symtable.c#L884 +*/ +fn drop_class_free(symbol_table: &mut SymbolTable) { + // Check if __class__ is used as a free variable + if let Some(class_symbol) = symbol_table.symbols.get("__class__") { + if class_symbol.scope == SymbolScope::Free { + symbol_table.needs_class_closure = true; + // Note: In CPython, the symbol is removed from the free set, + // but in RustPython we handle this differently during code generation + } + } + + // Check if __classdict__ is used as a free variable + if let Some(classdict_symbol) = symbol_table.symbols.get("__classdict__") { + if classdict_symbol.scope == SymbolScope::Free { + symbol_table.needs_classdict = true; + // Note: In CPython, the symbol is removed from the free set, + // but in RustPython we handle this differently during code generation + } + } +} + type SymbolMap = IndexMap; mod stack { @@ -314,6 +346,12 @@ impl SymbolTableAnalyzer { for symbol in symbol_table.symbols.values_mut() { self.analyze_symbol(symbol, symbol_table.typ, sub_tables)?; } + + // Handle class-specific implicit cells (like CPython) + if symbol_table.typ == SymbolTableType::Class { + drop_class_free(symbol_table); + } + Ok(()) } From fef660e6b3d035e8463b6d66de9bb37ddd184184 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sat, 12 Jul 2025 20:42:57 +0900 Subject: [PATCH 025/176] more PEP695 (#5917) * compile_class_body * type.__orig_bases__ regression of test_all_exported_names * rework type_params scope * refactor compile_class_def --- Lib/test/test_descr.py | 2 - Lib/test/test_typing.py | 4 - compiler/codegen/src/compile.rs | 248 ++++++++++++++++++++++---------- vm/src/builtins/genericalias.rs | 10 +- 4 files changed, 177 insertions(+), 87 deletions(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index d1c83cc337..b0414d5b00 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -5124,8 +5124,6 @@ def test_iter_keys(self): self.assertEqual(keys, ['__dict__', '__doc__', '__module__', '__weakref__', 'meth']) - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), 'trace function introduces __local__') def test_iter_values(self): diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index d0fe1b0188..96fddfce03 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -6934,8 +6934,6 @@ class Y(Generic[T], NamedTuple): with self.assertRaises(TypeError): G[int, str] - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_generic_pep695(self): class X[T](NamedTuple): x: T @@ -7560,8 +7558,6 @@ class FooBarGeneric(BarGeneric[int]): {'a': typing.Optional[T], 'b': int, 'c': str} ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_pep695_generic_typeddict(self): class A[T](TypedDict): a: T diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 14495499c5..0bfe4adb41 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -2069,39 +2069,20 @@ impl Compiler<'_> { false } - fn compile_class_def( + /// Compile the class body into a code object + /// This is similar to CPython's compiler_class_body + fn compile_class_body( &mut self, name: &str, body: &[Stmt], - decorator_list: &[Decorator], type_params: Option<&TypeParams>, - arguments: Option<&Arguments>, - ) -> CompileResult<()> { - self.prepare_decorators(decorator_list)?; - - let prev_ctx = self.ctx; - self.ctx = CompileContext { - func: FunctionContext::NoFunction, - in_class: true, - loop_data: None, - }; - - // If there are type params, we need to push a special symbol table just for them - if let Some(type_params) = type_params { - self.push_symbol_table(); - // Save current private name to restore later - let saved_private = self.code_stack.last().and_then(|info| info.private.clone()); - // Compile type parameters and store as .type_params - self.compile_type_params(type_params)?; - // Restore private name after type param scope - if let Some(private) = saved_private { - self.code_stack.last_mut().unwrap().private = Some(private); - } - let dot_type_params = self.name(".type_params"); - emit!(self, Instruction::StoreLocal(dot_type_params)); - } - - self.push_output(bytecode::CodeFlags::empty(), 0, 0, 0, name.to_owned()); + firstlineno: u32, + ) -> CompileResult { + // 1. Enter class scope + // Use enter_scope instead of push_output to match CPython + let key = self.symbol_table_stack.len(); + self.push_symbol_table(); + self.enter_scope(name, SymbolTableType::Class, key, firstlineno)?; // Set qualname using the new method let qualname = self.set_qualname(); @@ -2109,26 +2090,35 @@ impl Compiler<'_> { // For class scopes, set u_private to the class name for name mangling self.code_stack.last_mut().unwrap().private = Some(name.to_owned()); + // 2. Set up class namespace let (doc_str, body) = split_doc(body, &self.opts); + // Load (global) __name__ and store as __module__ let dunder_name = self.name("__name__"); emit!(self, Instruction::LoadGlobal(dunder_name)); let dunder_module = self.name("__module__"); emit!(self, Instruction::StoreLocal(dunder_module)); + + // Store __qualname__ self.emit_load_const(ConstantData::Str { value: qualname.into(), }); let qualname_name = self.name("__qualname__"); emit!(self, Instruction::StoreLocal(qualname_name)); + + // Store __doc__ self.load_docstring(doc_str); let doc = self.name("__doc__"); emit!(self, Instruction::StoreLocal(doc)); - // setup annotations - if Self::find_ann(body) { - emit!(self, Instruction::SetupAnnotation); - } - // Set __type_params__ from .type_params if we have type parameters (PEP 695) + // Store __firstlineno__ (new in Python 3.12+) + self.emit_load_const(ConstantData::Integer { + value: BigInt::from(firstlineno), + }); + let firstlineno_name = self.name("__firstlineno__"); + emit!(self, Instruction::StoreLocal(firstlineno_name)); + + // Set __type_params__ if we have type parameters if type_params.is_some() { // Load .type_params from enclosing scope let dot_type_params = self.name(".type_params"); @@ -2139,8 +2129,15 @@ impl Compiler<'_> { emit!(self, Instruction::StoreLocal(dunder_type_params)); } + // Setup annotations if needed + if Self::find_ann(body) { + emit!(self, Instruction::SetupAnnotation); + } + + // 3. Compile the class body self.compile_statements(body)?; + // 4. Handle __classcell__ if needed let classcell_idx = self .code_stack .last_mut() @@ -2159,65 +2156,167 @@ impl Compiler<'_> { self.emit_load_const(ConstantData::None); } + // Return the class namespace self.emit_return_value(); - let code = self.exit_scope(); - self.ctx = prev_ctx; + // Exit scope and return the code object + Ok(self.exit_scope()) + } + + fn compile_class_def( + &mut self, + name: &str, + body: &[Stmt], + decorator_list: &[Decorator], + type_params: Option<&TypeParams>, + arguments: Option<&Arguments>, + ) -> CompileResult<()> { + self.prepare_decorators(decorator_list)?; - emit!(self, Instruction::LoadBuildClass); + let is_generic = type_params.is_some(); + let firstlineno = self.get_source_line_number().get().to_u32(); - let mut func_flags = bytecode::MakeFunctionFlags::empty(); + // Step 1: If generic, enter type params scope and compile type params + if is_generic { + let type_params_name = format!(""); + self.push_output( + bytecode::CodeFlags::IS_OPTIMIZED | bytecode::CodeFlags::NEW_LOCALS, + 0, + 0, + 0, + type_params_name, + ); - // Prepare generic type parameters: - if type_params.is_some() { - // Load .type_params from the type params scope + // Set private name for name mangling + self.code_stack.last_mut().unwrap().private = Some(name.to_owned()); + + // Compile type parameters and store as .type_params + self.compile_type_params(type_params.unwrap())?; let dot_type_params = self.name(".type_params"); - emit!(self, Instruction::LoadNameAny(dot_type_params)); - func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; + emit!(self, Instruction::StoreLocal(dot_type_params)); } - if self.build_closure(&code) { - func_flags |= bytecode::MakeFunctionFlags::CLOSURE; - } + // Step 2: Compile class body (always done, whether generic or not) + let prev_ctx = self.ctx; + self.ctx = CompileContext { + func: FunctionContext::NoFunction, + in_class: true, + loop_data: None, + }; + let class_code = self.compile_class_body(name, body, type_params, firstlineno)?; + self.ctx = prev_ctx; - self.emit_load_const(ConstantData::Code { - code: Box::new(code), - }); - self.emit_load_const(ConstantData::Str { value: name.into() }); + // Step 3: Generate the rest of the code for the call + if is_generic { + // Still in type params scope + let dot_type_params = self.name(".type_params"); + let dot_generic_base = self.name(".generic_base"); - // Turn code object into function object: - emit!(self, Instruction::MakeFunction(func_flags)); + // Create .generic_base + emit!(self, Instruction::LoadNameAny(dot_type_params)); + emit!( + self, + Instruction::CallIntrinsic1 { + func: bytecode::IntrinsicFunction1::SubscriptGeneric + } + ); + emit!(self, Instruction::StoreLocal(dot_generic_base)); - self.emit_load_const(ConstantData::Str { value: name.into() }); + // Generate class creation code + emit!(self, Instruction::LoadBuildClass); - // For PEP 695 classes: handle Generic base creation - if type_params.is_some() { - if let Some(arguments) = arguments { - // Has explicit bases - use them as is, don't add Generic - // CPython doesn't add Generic when explicit bases are present - let call = self.compile_call_inner(2, arguments)?; - self.compile_normal_call(call); + // Set up the class function with type params + let mut func_flags = bytecode::MakeFunctionFlags::empty(); + emit!(self, Instruction::LoadNameAny(dot_type_params)); + func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; + + if self.build_closure(&class_code) { + func_flags |= bytecode::MakeFunctionFlags::CLOSURE; + } + + self.emit_load_const(ConstantData::Code { + code: Box::new(class_code), + }); + self.emit_load_const(ConstantData::Str { value: name.into() }); + emit!(self, Instruction::MakeFunction(func_flags)); + self.emit_load_const(ConstantData::Str { value: name.into() }); + + // Compile original bases + let base_count = if let Some(arguments) = arguments { + for arg in &arguments.args { + self.compile_expression(arg)?; + } + arguments.args.len() } else { - // No explicit bases, add Generic[*type_params] as the only base - // Stack currently: [function, class_name] + 0 + }; + + // Load .generic_base as the last base + emit!(self, Instruction::LoadNameAny(dot_generic_base)); - // Load .type_params for creating Generic base - let dot_type_params = self.name(".type_params"); - emit!(self, Instruction::LoadNameAny(dot_type_params)); + let nargs = 2 + u32::try_from(base_count).expect("too many base classes") + 1; // function, name, bases..., generic_base - // Call INTRINSIC_SUBSCRIPT_GENERIC to create Generic[*type_params] + // Handle keyword arguments + if let Some(arguments) = arguments + && !arguments.keywords.is_empty() + { + for keyword in &arguments.keywords { + if let Some(name) = &keyword.arg { + self.emit_load_const(ConstantData::Str { + value: name.as_str().into(), + }); + } + self.compile_expression(&keyword.value)?; + } emit!( self, - Instruction::CallIntrinsic1 { - func: bytecode::IntrinsicFunction1::SubscriptGeneric + Instruction::CallFunctionKeyword { + nargs: nargs + + u32::try_from(arguments.keywords.len()) + .expect("too many keyword arguments") } ); + } else { + emit!(self, Instruction::CallFunctionPositional { nargs }); + } + + // Return the created class + self.emit_return_value(); - // Call __build_class__ with 3 positional args: function, class_name, Generic[T] - emit!(self, Instruction::CallFunctionPositional { nargs: 3 }); + // Exit type params scope and wrap in function + let type_params_code = self.exit_scope(); + + // Execute the type params function + if self.build_closure(&type_params_code) { + // Should not need closure } + self.emit_load_const(ConstantData::Code { + code: Box::new(type_params_code), + }); + self.emit_load_const(ConstantData::Str { + value: format!("").into(), + }); + emit!( + self, + Instruction::MakeFunction(bytecode::MakeFunctionFlags::empty()) + ); + emit!(self, Instruction::CallFunctionPositional { nargs: 0 }); } else { - // No type params, normal compilation + // Non-generic class: standard path + emit!(self, Instruction::LoadBuildClass); + + let mut func_flags = bytecode::MakeFunctionFlags::empty(); + if self.build_closure(&class_code) { + func_flags |= bytecode::MakeFunctionFlags::CLOSURE; + } + + self.emit_load_const(ConstantData::Code { + code: Box::new(class_code), + }); + self.emit_load_const(ConstantData::Str { value: name.into() }); + emit!(self, Instruction::MakeFunction(func_flags)); + self.emit_load_const(ConstantData::Str { value: name.into() }); + let call = if let Some(arguments) = arguments { self.compile_call_inner(2, arguments)? } else { @@ -2226,13 +2325,8 @@ impl Compiler<'_> { self.compile_normal_call(call); } - // Pop the special type params symbol table - if type_params.is_some() { - self.pop_symbol_table(); - } - + // Step 4: Apply decorators and store (common to both paths) self.apply_decorators(decorator_list); - self.store_name(name) } diff --git a/vm/src/builtins/genericalias.rs b/vm/src/builtins/genericalias.rs index fc666190bf..00bd65583d 100644 --- a/vm/src/builtins/genericalias.rs +++ b/vm/src/builtins/genericalias.rs @@ -617,19 +617,21 @@ impl Iterable for PyGenericAlias { /// This is used for PEP 695 classes to create Generic[T] from type parameters // _Py_subscript_generic pub fn subscript_generic(type_params: PyObjectRef, vm: &VirtualMachine) -> PyResult { - // Get typing.Generic type + // Get typing module and _GenericAlias let typing_module = vm.import("typing", 0)?; let generic_type = typing_module.get_attr("Generic", vm)?; - let generic_type = PyTypeRef::try_from_object(vm, generic_type)?; - // Create GenericAlias: Generic[type_params] + // Call typing._GenericAlias(Generic, type_params) + let generic_alias_class = typing_module.get_attr("_GenericAlias", vm)?; + let args = if let Ok(tuple) = type_params.try_to_ref::(vm) { tuple.to_owned() } else { PyTuple::new_ref(vec![type_params], &vm.ctx) }; - Ok(PyGenericAlias::new(generic_type, args, false, vm).into_pyobject(vm)) + // Create _GenericAlias instance + generic_alias_class.call((generic_type, args.to_pyobject(vm)), vm) } pub fn init(context: &Context) { From e75aebb967266c02c2b10f9294afe2b8df878efc Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 14:44:34 +0300 Subject: [PATCH 026/176] Update str related tests from 3.13.5 (#5953) * Update str related tests from 3.13.5 * Apply RustPython patches * Mark new failing tests --- Lib/test/string_tests.py | 113 +++++++--- Lib/test/test_bytes.py | 239 ++++++++++++++++------ Lib/test/{test_unicode.py => test_str.py} | 192 ++++++++++++----- Lib/test/test_unicode_file.py | 2 +- Lib/test/test_unicode_file_functions.py | 14 +- Lib/test/test_unicode_identifiers.py | 2 +- Lib/test/test_unicodedata.py | 62 +++++- Lib/test/test_userstring.py | 3 +- 8 files changed, 464 insertions(+), 163 deletions(-) rename Lib/test/{test_unicode.py => test_str.py} (94%) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 6f402513fd..3f82b515bb 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -8,18 +8,12 @@ from collections import UserList import random + class Sequence: def __init__(self, seq='wxyz'): self.seq = seq def __len__(self): return len(self.seq) def __getitem__(self, i): return self.seq[i] -class BadSeq1(Sequence): - def __init__(self): self.seq = [7, 'hello', 123] - def __str__(self): return '{0} {1} {2}'.format(*self.seq) - -class BadSeq2(Sequence): - def __init__(self): self.seq = ['a', 'b', 'c'] - def __len__(self): return 8 class BaseTest: # These tests are for buffers of values (bytes) and not @@ -27,7 +21,7 @@ class BaseTest: # and various string implementations # The type to be tested - # Change in subclasses to change the behaviour of fixtesttype() + # Change in subclasses to change the behaviour of fixtype() type2test = None # Whether the "contained items" of the container are integers in @@ -36,7 +30,7 @@ class BaseTest: contains_bytes = False # All tests pass their arguments to the testing methods - # as str objects. fixtesttype() can be used to propagate + # as str objects. fixtype() can be used to propagate # these arguments to the appropriate type def fixtype(self, obj): if isinstance(obj, str): @@ -160,6 +154,14 @@ def test_count(self): self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) + # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count + @unittest.expectedFailure + def test_count_keyword(self): + self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0)) + self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1)) + self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2)) + self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3)) + def test_find(self): self.checkequal(0, 'abcdefghiabc', 'find', 'abc') self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1) @@ -327,11 +329,12 @@ def reference_find(p, s): for i in range(len(s)): if s.startswith(p, i): return i + if p == '' and s == '': + return 0 return -1 - rr = random.randrange - choices = random.choices - for _ in range(1000): + def check_pattern(rr): + choices = random.choices p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20) p = p0[:len(p0) - rr(10)] # pop off some characters left = ''.join(choices('abcdef', k=rr(2000))) @@ -341,6 +344,49 @@ def reference_find(p, s): self.checkequal(reference_find(p, text), text, 'find', p) + rr = random.randrange + for _ in range(1000): + check_pattern(rr) + + # Test that empty string always work: + check_pattern(lambda *args: 0) + + def test_find_many_lengths(self): + haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)] + haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats] + + needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)] + needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats] + + for n, haystack1 in haystacks: + haystack2 = haystack1[:-1] + for m, needle in needles: + answer1 = 5 * (n - m) if m <= n else -1 + self.assertEqual(haystack1.find(needle), answer1, msg=(n,m)) + self.assertEqual(haystack2.find(needle), -1, msg=(n,m)) + + def test_adaptive_find(self): + # This would be very slow for the naive algorithm, + # but str.find() should be O(n + m). + for N in 1000, 10_000, 100_000, 1_000_000: + A, B = 'a' * N, 'b' * N + haystack = A + A + B + A + A + needle = A + B + B + A + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + + def test_find_with_memory(self): + # Test the "Skip with memory" path in the two-way algorithm. + for N in 1000, 3000, 10_000, 30_000: + needle = 'ab' * N + haystack = ('ab'*(N-1) + 'b') * 2 + self.checkequal(-1, haystack, 'find', needle) + self.checkequal(0, haystack, 'count', needle) + self.checkequal(len(haystack), haystack + needle, 'find', needle) + self.checkequal(1, haystack + needle, 'count', needle) + def test_find_shift_table_overflow(self): """When the table of 8-bit shifts overflows.""" N = 2**8 + 100 @@ -724,6 +770,18 @@ def test_replace(self): self.checkraises(TypeError, 'hello', 'replace', 42, 'h') self.checkraises(TypeError, 'hello', 'replace', 'h', 42) + def test_replace_uses_two_way_maxcount(self): + # Test that maxcount works in _two_way_count in fastsearch.h + A, B = "A"*1000, "B"*1000 + AABAA = A + A + B + A + A + ABBA = A + B + B + A + self.checkequal(AABAA + ABBA, + AABAA + ABBA, 'replace', ABBA, "ccc", 0) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 1) + self.checkequal(AABAA + "ccc", + AABAA + ABBA, 'replace', ABBA, "ccc", 2) + @unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms") @unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4, 'only applies to 32-bit platforms') @@ -734,8 +792,6 @@ def test_replace_overflow(self): self.checkraises(OverflowError, A2_16, "replace", "A", A2_16) self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) - - # Python 3.9 def test_removeprefix(self): self.checkequal('am', 'spam', 'removeprefix', 'sp') self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam') @@ -754,7 +810,6 @@ def test_removeprefix(self): self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42) self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l")) - # Python 3.9 def test_removesuffix(self): self.checkequal('sp', 'spam', 'removesuffix', 'am') self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam') @@ -1053,7 +1108,7 @@ def test_splitlines(self): self.checkraises(TypeError, 'abc', 'splitlines', 42, 42) -class CommonTest(BaseTest): +class StringLikeTest(BaseTest): # This testcase contains tests that can be used in all # stringlike classes. Currently this is str and UserString. @@ -1084,11 +1139,6 @@ def test_capitalize_nonascii(self): self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') - -class MixinStrUnicodeUserStringTest: - # additional tests that only work for - # stringlike objects, i.e. str, UserString - def test_startswith(self): self.checkequal(True, 'hello', 'startswith', 'he') self.checkequal(True, 'hello', 'startswith', 'hello') @@ -1273,8 +1323,11 @@ def test_join(self): self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join', ('a' * i,) * i) - #self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1()) - self.checkequal('a b c', ' ', 'join', BadSeq2()) + class LiesAboutLengthSeq(Sequence): + def __init__(self): self.seq = ['a', 'b', 'c'] + def __len__(self): return 8 + + self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq()) self.checkraises(TypeError, ' ', 'join') self.checkraises(TypeError, ' ', 'join', None) @@ -1459,19 +1512,19 @@ def test_find_etc_raise_correct_error_messages(self): # issue 11828 s = 'hello' x = 'x' - self.assertRaisesRegex(TypeError, r'^find\(', s.find, + self.assertRaisesRegex(TypeError, r'^find\b', s.find, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind, + self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^index\(', s.index, + self.assertRaisesRegex(TypeError, r'^index\b', s.index, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex, + self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^count\(', s.count, + self.assertRaisesRegex(TypeError, r'^count\b', s.count, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith, + self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith, x, None, None, None) - self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith, + self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith, x, None, None, None) # issue #15534 diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 3c634b6cac..e84df546a8 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -10,6 +10,7 @@ import sys import copy import functools +import operator import pickle import tempfile import textwrap @@ -46,6 +47,10 @@ def __index__(self): class BaseBytesTest: + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_basics(self): b = self.type2test() self.assertEqual(type(b), self.type2test) @@ -737,6 +742,37 @@ def check(fmt, vals, result): check(b'%i%b %*.*b', (10, b'3', 5, 3, b'abc',), b'103 abc') check(b'%c', b'a', b'a') + class PseudoFloat: + def __init__(self, value): + self.value = float(value) + def __int__(self): + return int(self.value) + + pi = PseudoFloat(3.1415) + + exceptions_params = [ + ('%x format: an integer is required, not float', b'%x', 3.14), + ('%X format: an integer is required, not float', b'%X', 2.11), + ('%o format: an integer is required, not float', b'%o', 1.79), + ('%x format: an integer is required, not PseudoFloat', b'%x', pi), + ('%x format: an integer is required, not complex', b'%x', 3j), + ('%X format: an integer is required, not complex', b'%X', 2j), + ('%o format: an integer is required, not complex', b'%o', 1j), + ('%u format: a real number is required, not complex', b'%u', 3j), + # See https://github.com/python/cpython/issues/130928 as for why + # the exception message contains '%d' instead of '%i'. + ('%d format: a real number is required, not complex', b'%i', 2j), + ('%d format: a real number is required, not complex', b'%d', 2j), + ( + r'%c requires an integer in range\(256\) or a single byte', + b'%c', pi + ), + ] + + for msg, format_bytes, value in exceptions_params: + with self.assertRaisesRegex(TypeError, msg): + operator.mod(format_bytes, value) + def test_imod(self): b = self.type2test(b'hello, %b!') orig = b @@ -995,13 +1031,13 @@ def test_translate(self): self.assertEqual(c, b'hllo') def test_sq_item(self): - _testcapi = import_helper.import_module('_testcapi') + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') obj = self.type2test((42,)) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, -2) + _testlimitedcapi.sequence_getitem(obj, -2) with self.assertRaises(IndexError): - _testcapi.sequence_getitem(obj, 1) - self.assertEqual(_testcapi.sequence_getitem(obj, 0), 42) + _testlimitedcapi.sequence_getitem(obj, 1) + self.assertEqual(_testlimitedcapi.sequence_getitem(obj, 0), 42) class BytesTest(BaseBytesTest, unittest.TestCase): @@ -1031,36 +1067,63 @@ def test_buffer_is_readonly(self): self.assertRaises(TypeError, f.readinto, b"") def test_custom(self): - class A: - def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A()), b'abc') - class A: pass - self.assertRaises(TypeError, bytes, A()) - class A: - def __bytes__(self): - return None - self.assertRaises(TypeError, bytes, A()) - class A: + self.assertEqual(bytes(BytesSubclass(b'abc')), b'abc') + self.assertEqual(BytesSubclass(OtherBytesSubclass(b'abc')), + BytesSubclass(b'abc')) + self.assertEqual(bytes(WithBytes(b'abc')), b'abc') + self.assertEqual(BytesSubclass(WithBytes(b'abc')), BytesSubclass(b'abc')) + + class NoBytes: pass + self.assertRaises(TypeError, bytes, NoBytes()) + self.assertRaises(TypeError, bytes, WithBytes('abc')) + self.assertRaises(TypeError, bytes, WithBytes(None)) + class IndexWithBytes: def __bytes__(self): return b'a' def __index__(self): return 42 - self.assertEqual(bytes(A()), b'a') + self.assertEqual(bytes(IndexWithBytes()), b'a') # Issue #25766 - class A(str): + class StrWithBytes(str): + def __new__(cls, value): + self = str.__new__(cls, '\u20ac') + self.value = value + return self def __bytes__(self): - return b'abc' - self.assertEqual(bytes(A('\u20ac')), b'abc') - self.assertEqual(bytes(A('\u20ac'), 'iso8859-15'), b'\xa4') + return self.value + self.assertEqual(bytes(StrWithBytes(b'abc')), b'abc') + self.assertEqual(bytes(StrWithBytes(b'abc'), 'iso8859-15'), b'\xa4') + self.assertEqual(bytes(StrWithBytes(BytesSubclass(b'abc'))), b'abc') + self.assertEqual(BytesSubclass(StrWithBytes(b'abc')), BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(b'abc'), 'iso8859-15'), + BytesSubclass(b'\xa4')) + self.assertEqual(BytesSubclass(StrWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertEqual(BytesSubclass(StrWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Issue #24731 - class A: + self.assertTypedEqual(bytes(WithBytes(BytesSubclass(b'abc'))), BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(WithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + + class BytesWithBytes(bytes): + def __new__(cls, value): + self = bytes.__new__(cls, b'\xa4') + self.value = value + return self def __bytes__(self): - return OtherBytesSubclass(b'abc') - self.assertEqual(bytes(A()), b'abc') - self.assertIs(type(bytes(A())), OtherBytesSubclass) - self.assertEqual(BytesSubclass(A()), b'abc') - self.assertIs(type(BytesSubclass(A())), BytesSubclass) + return self.value + self.assertTypedEqual(bytes(BytesWithBytes(b'abc')), b'abc') + self.assertTypedEqual(BytesSubclass(BytesWithBytes(b'abc')), + BytesSubclass(b'abc')) + self.assertTypedEqual(bytes(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(BytesSubclass(b'abc'))), + BytesSubclass(b'abc')) + self.assertTypedEqual(BytesSubclass(BytesWithBytes(OtherBytesSubclass(b'abc'))), + BytesSubclass(b'abc')) # Test PyBytes_FromFormat() def test_from_format(self): @@ -1233,6 +1296,8 @@ class SubBytes(bytes): class ByteArrayTest(BaseBytesTest, unittest.TestCase): type2test = bytearray + _testlimitedcapi = import_helper.import_module('_testlimitedcapi') + def test_getitem_error(self): b = bytearray(b'python') msg = "bytearray indices must be integers or slices" @@ -1325,47 +1390,73 @@ def by(s): self.assertEqual(re.findall(br"\w+", b), [by("Hello"), by("world")]) def test_setitem(self): - b = bytearray([1, 2, 3]) - b[1] = 100 - self.assertEqual(b, bytearray([1, 100, 3])) - b[-1] = 200 - self.assertEqual(b, bytearray([1, 100, 200])) - b[0] = Indexable(10) - self.assertEqual(b, bytearray([10, 100, 200])) - try: - b[3] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[-10] = 0 - self.fail("Didn't raise IndexError") - except IndexError: - pass - try: - b[0] = 256 - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = Indexable(-1) - self.fail("Didn't raise ValueError") - except ValueError: - pass - try: - b[0] = None - self.fail("Didn't raise TypeError") - except TypeError: - pass + def setitem_as_mapping(b, i, val): + b[i] = val + + def setitem_as_sequence(b, i, val): + self._testlimitedcapi.sequence_setitem(b, i, val) + + def do_tests(setitem): + b = bytearray([1, 2, 3]) + setitem(b, 1, 100) + self.assertEqual(b, bytearray([1, 100, 3])) + setitem(b, -1, 200) + self.assertEqual(b, bytearray([1, 100, 200])) + setitem(b, 0, Indexable(10)) + self.assertEqual(b, bytearray([10, 100, 200])) + try: + setitem(b, 3, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, -10, 0) + self.fail("Didn't raise IndexError") + except IndexError: + pass + try: + setitem(b, 0, 256) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, Indexable(-1)) + self.fail("Didn't raise ValueError") + except ValueError: + pass + try: + setitem(b, 0, object()) + self.fail("Didn't raise TypeError") + except TypeError: + pass + + with self.subTest("tp_as_mapping"): + do_tests(setitem_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(setitem_as_sequence) def test_delitem(self): - b = bytearray(range(10)) - del b[0] - self.assertEqual(b, bytearray(range(1, 10))) - del b[-1] - self.assertEqual(b, bytearray(range(1, 9))) - del b[4] - self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + def del_as_mapping(b, i): + del b[i] + + def del_as_sequence(b, i): + self._testlimitedcapi.sequence_delitem(b, i) + + def do_tests(delete): + b = bytearray(range(10)) + delete(b, 0) + self.assertEqual(b, bytearray(range(1, 10))) + delete(b, -1) + self.assertEqual(b, bytearray(range(1, 9))) + delete(b, 4) + self.assertEqual(b, bytearray([1, 2, 3, 4, 6, 7, 8])) + + with self.subTest("tp_as_mapping"): + do_tests(del_as_mapping) + + with self.subTest("tp_as_sequence"): + do_tests(del_as_sequence) def test_setslice(self): b = bytearray(range(10)) @@ -1558,6 +1649,13 @@ def test_extend(self): a = bytearray(b'') a.extend([Indexable(ord('a'))]) self.assertEqual(a, b'a') + a = bytearray(b'abc') + self.assertRaisesRegex(TypeError, # Override for string. + "expected iterable of integers; got: 'str'", + a.extend, 'def') + self.assertRaisesRegex(TypeError, # But not for others. + "can't extend bytearray with float", + a.extend, 1.0) def test_remove(self): b = bytearray(b'hello') @@ -1747,6 +1845,8 @@ def test_repeat_after_setslice(self): self.assertEqual(b3, b'xcxcxc') def test_mutating_index(self): + # See gh-91153 + class Boom: def __index__(self): b.clear() @@ -1758,10 +1858,9 @@ def __index__(self): b[0] = Boom() with self.subTest("tp_as_sequence"): - _testcapi = import_helper.import_module('_testcapi') b = bytearray(b'Now you see me...') with self.assertRaises(IndexError): - _testcapi.sequence_setitem(b, 0, Boom()) + self._testlimitedcapi.sequence_setitem(b, 0, Boom()) class AssortedBytesTest(unittest.TestCase): @@ -2060,6 +2159,12 @@ class BytesSubclass(bytes): class OtherBytesSubclass(bytes): pass +class WithBytes: + def __init__(self, value): + self.value = value + def __bytes__(self): + return self.value + class ByteArraySubclassTest(SubclassTest, unittest.TestCase): basetype = bytearray type2test = ByteArraySubclass diff --git a/Lib/test/test_unicode.py b/Lib/test/test_str.py similarity index 94% rename from Lib/test/test_unicode.py rename to Lib/test/test_str.py index 1a8a8f7ee9..ef2d211a61 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_str.py @@ -7,6 +7,7 @@ """ import _string import codecs +import datetime import itertools import operator import pickle @@ -55,8 +56,22 @@ def duplicate_string(text): class StrSubclass(str): pass -class UnicodeTest(string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, +class OtherStrSubclass(str): + pass + +class WithStr: + def __init__(self, value): + self.value = value + def __str__(self): + return self.value + +class WithRepr: + def __init__(self, value): + self.value = value + def __repr__(self): + return self.value + +class StrTest(string_tests.StringLikeTest, string_tests.MixinStrUnicodeTest, unittest.TestCase): @@ -84,6 +99,10 @@ def __repr__(self): self.assertEqual(realresult, result) self.assertTrue(object is not realresult) + def assertTypedEqual(self, actual, expected): + self.assertIs(type(actual), type(expected)) + self.assertEqual(actual, expected) + def test_literals(self): self.assertEqual('\xff', '\u00ff') self.assertEqual('\uffff', '\U0000ffff') @@ -93,6 +112,8 @@ def test_literals(self): # raw strings should not have unicode escapes self.assertNotEqual(r"\u0020", " ") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_ascii(self): self.assertEqual(ascii('abc'), "'abc'") self.assertEqual(ascii('ab\\c'), "'ab\\\\c'") @@ -128,10 +149,13 @@ def test_ascii(self): self.assertEqual(ascii("\U00010000" * 39 + "\uffff" * 4096), ascii("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, ascii, WrongRepr()) + self.assertTypedEqual(ascii('\U0001f40d'), r"'\U0001f40d'") + self.assertTypedEqual(ascii(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(ascii(WithRepr('')), '') + self.assertTypedEqual(ascii(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(ascii(WithRepr('<\U0001f40d>')), r'<\U0001f40d>') + self.assertTypedEqual(ascii(WithRepr(StrSubclass('<\U0001f40d>'))), r'<\U0001f40d>') + self.assertRaises(TypeError, ascii, WithRepr(b'byte-repr')) def test_repr(self): # Test basic sanity of repr() @@ -169,10 +193,13 @@ def test_repr(self): self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096), repr("\U00010000" * 39 + "\uffff" * 4096)) - class WrongRepr: - def __repr__(self): - return b'byte-repr' - self.assertRaises(TypeError, repr, WrongRepr()) + self.assertTypedEqual(repr('\U0001f40d'), "'\U0001f40d'") + self.assertTypedEqual(repr(StrSubclass('abc')), "'abc'") + self.assertTypedEqual(repr(WithRepr('')), '') + self.assertTypedEqual(repr(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(repr(WithRepr('<\U0001f40d>')), '<\U0001f40d>') + self.assertTypedEqual(repr(WithRepr(StrSubclass('<\U0001f40d>'))), StrSubclass('<\U0001f40d>')) + self.assertRaises(TypeError, repr, WithRepr(b'byte-repr')) def test_iterators(self): # Make sure unicode objects have an __iter__ method @@ -213,7 +240,7 @@ def test_pickle_iterator(self): self.assertEqual(case, pickled) def test_count(self): - string_tests.CommonTest.test_count(self) + string_tests.StringLikeTest.test_count(self) # check mixed argument types self.checkequalnofix(3, 'aaa', 'count', 'a') self.checkequalnofix(0, 'aaa', 'count', 'b') @@ -243,7 +270,7 @@ class MyStr(str): self.checkequal(3, MyStr('aaa'), 'count', 'a') def test_find(self): - string_tests.CommonTest.test_find(self) + string_tests.StringLikeTest.test_find(self) # test implementation details of the memchr fast path self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102') self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201') @@ -288,7 +315,7 @@ def test_find(self): self.checkequal(-1, '\u0102' * 100, 'find', '\u0102\U00100304') def test_rfind(self): - string_tests.CommonTest.test_rfind(self) + string_tests.StringLikeTest.test_rfind(self) # test implementation details of the memrchr fast path self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102') self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201') @@ -329,7 +356,7 @@ def test_rfind(self): self.checkequal(-1, '\u0102' * 100, 'rfind', '\U00100304\u0102') def test_index(self): - string_tests.CommonTest.test_index(self) + string_tests.StringLikeTest.test_index(self) self.checkequalnofix(0, 'abcdefghiabc', 'index', '') self.checkequalnofix(3, 'abcdefghiabc', 'index', 'def') self.checkequalnofix(0, 'abcdefghiabc', 'index', 'abc') @@ -353,7 +380,7 @@ def test_index(self): self.assertRaises(ValueError, ('\u0102' * 100).index, '\u0102\U00100304') def test_rindex(self): - string_tests.CommonTest.test_rindex(self) + string_tests.StringLikeTest.test_rindex(self) self.checkequalnofix(12, 'abcdefghiabc', 'rindex', '') self.checkequalnofix(3, 'abcdefghiabc', 'rindex', 'def') self.checkequalnofix(9, 'abcdefghiabc', 'rindex', 'abc') @@ -449,7 +476,7 @@ def test_maketrans_translate(self): self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz') def test_split(self): - string_tests.CommonTest.test_split(self) + string_tests.StringLikeTest.test_split(self) # test mixed kinds for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -466,7 +493,7 @@ def test_split(self): left + delim * 2 + right, 'split', delim *2) def test_rsplit(self): - string_tests.CommonTest.test_rsplit(self) + string_tests.StringLikeTest.test_rsplit(self) # test mixed kinds for left, right in ('ba', 'юё', '\u0101\u0100', '\U00010301\U00010300'): left *= 9 @@ -486,7 +513,7 @@ def test_rsplit(self): left + right, 'rsplit', None) def test_partition(self): - string_tests.MixinStrUnicodeUserStringTest.test_partition(self) + string_tests.StringLikeTest.test_partition(self) # test mixed kinds self.checkequal(('ABCDEFGH', '', ''), 'ABCDEFGH', 'partition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -503,7 +530,7 @@ def test_partition(self): left + delim * 2 + right, 'partition', delim * 2) def test_rpartition(self): - string_tests.MixinStrUnicodeUserStringTest.test_rpartition(self) + string_tests.StringLikeTest.test_rpartition(self) # test mixed kinds self.checkequal(('', '', 'ABCDEFGH'), 'ABCDEFGH', 'rpartition', '\u4200') for left, right in ('ba', '\u0101\u0100', '\U00010301\U00010300'): @@ -520,7 +547,7 @@ def test_rpartition(self): left + delim * 2 + right, 'rpartition', delim * 2) def test_join(self): - string_tests.MixinStrUnicodeUserStringTest.test_join(self) + string_tests.StringLikeTest.test_join(self) class MyWrapper: def __init__(self, sval): self.sval = sval @@ -548,7 +575,7 @@ def test_join_overflow(self): self.assertRaises(OverflowError, ''.join, seq) def test_replace(self): - string_tests.CommonTest.test_replace(self) + string_tests.StringLikeTest.test_replace(self) # method call forwarded from str implementation because of unicode argument self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1) @@ -831,6 +858,15 @@ def test_isprintable(self): self.assertTrue('\U0001F46F'.isprintable()) self.assertFalse('\U000E0020'.isprintable()) + @support.requires_resource('cpu') + def test_isprintable_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + category = unicodedata.category(char) + self.assertEqual(char.isprintable(), + category[0] not in ('C', 'Z') + or char == ' ') + def test_surrogates(self): for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'): @@ -859,7 +895,7 @@ def test_surrogates(self): def test_lower(self): - string_tests.CommonTest.test_lower(self) + string_tests.StringLikeTest.test_lower(self) self.assertEqual('\U00010427'.lower(), '\U0001044F') self.assertEqual('\U00010427\U00010427'.lower(), '\U0001044F\U0001044F') @@ -890,7 +926,7 @@ def test_casefold(self): self.assertEqual('\u00b5'.casefold(), '\u03bc') def test_upper(self): - string_tests.CommonTest.test_upper(self) + string_tests.StringLikeTest.test_upper(self) self.assertEqual('\U0001044F'.upper(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.upper(), '\U00010427\U00010427') @@ -909,7 +945,7 @@ def test_upper(self): # TODO: RUSTPYTHON @unittest.expectedFailure def test_capitalize(self): - string_tests.CommonTest.test_capitalize(self) + string_tests.StringLikeTest.test_capitalize(self) self.assertEqual('\U0001044F'.capitalize(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.capitalize(), '\U00010427\U0001044F') @@ -947,7 +983,7 @@ def test_title(self): # TODO: RUSTPYTHON @unittest.expectedFailure def test_swapcase(self): - string_tests.CommonTest.test_swapcase(self) + string_tests.StringLikeTest.test_swapcase(self) self.assertEqual('\U0001044F'.swapcase(), '\U00010427') self.assertEqual('\U00010427'.swapcase(), '\U0001044F') self.assertEqual('\U0001044F\U0001044F'.swapcase(), @@ -973,7 +1009,7 @@ def test_swapcase(self): self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300') def test_center(self): - string_tests.CommonTest.test_center(self) + string_tests.StringLikeTest.test_center(self) self.assertEqual('x'.center(2, '\U0010FFFF'), 'x\U0010FFFF') self.assertEqual('x'.center(3, '\U0010FFFF'), @@ -1483,7 +1519,7 @@ def __format__(self, spec): # TODO: RUSTPYTHON @unittest.expectedFailure def test_formatting(self): - string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) + string_tests.StringLikeTest.test_formatting(self) # Testing Unicode formatting strings... self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc') self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000, 3.00') @@ -1659,7 +1695,7 @@ def test_startswith_endswith_errors(self): self.assertIn('str', exc) self.assertIn('tuple', exc) - @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '') def test_format_float(self): # should not format with a comma, but always with C locale self.assertEqual('1.0', '%.1f' % 1.0) @@ -1730,8 +1766,6 @@ def __str__(self): 'character buffers are decoded to unicode' ) - self.assertRaises(TypeError, str, 42, 42, 42) - # TODO: RUSTPYTHON @unittest.expectedFailure def test_constructor_keyword_args(self): @@ -1910,6 +1944,12 @@ def test_utf8_decode_invalid_sequences(self): self.assertRaises(UnicodeDecodeError, (b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8') + def test_issue127903(self): + # gh-127903: ``_copy_characters`` crashes on DEBUG builds when + # there is nothing to copy. + d = datetime.datetime(2013, 11, 10, 14, 20, 59) + self.assertEqual(d.strftime('%z'), '') + def test_issue8271(self): # Issue #8271: during the decoding of an invalid UTF-8 byte sequence, # only the start byte and the continuation byte(s) are now considered @@ -2396,28 +2436,37 @@ def test_ucs4(self): @unittest.expectedFailure def test_conversion(self): # Make sure __str__() works properly - class ObjectToStr: - def __str__(self): - return "foo" - - class StrSubclassToStr(str): - def __str__(self): - return "foo" - - class StrSubclassToStrSubclass(str): - def __new__(cls, content=""): - return str.__new__(cls, 2*content) - def __str__(self): + class StrWithStr(str): + def __new__(cls, value): + self = str.__new__(cls, "") + self.value = value return self + def __str__(self): + return self.value - self.assertEqual(str(ObjectToStr()), "foo") - self.assertEqual(str(StrSubclassToStr("bar")), "foo") - s = str(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclassToStrSubclass) - s = StrSubclass(StrSubclassToStrSubclass("foo")) - self.assertEqual(s, "foofoo") - self.assertIs(type(s), StrSubclass) + self.assertTypedEqual(str(WithStr('abc')), 'abc') + self.assertTypedEqual(str(WithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(WithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(StrWithStr('abc')), 'abc') + self.assertTypedEqual(str(StrWithStr(StrSubclass('abc'))), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr('abc')), StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(StrSubclass('abc'))), + StrSubclass('abc')) + self.assertTypedEqual(StrSubclass(StrWithStr(OtherStrSubclass('abc'))), + StrSubclass('abc')) + + self.assertTypedEqual(str(WithRepr('')), '') + self.assertTypedEqual(str(WithRepr(StrSubclass(''))), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr('')), StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(StrSubclass(''))), + StrSubclass('')) + self.assertTypedEqual(StrSubclass(WithRepr(OtherStrSubclass(''))), + StrSubclass('')) def test_unicode_repr(self): class s1: @@ -2652,6 +2701,49 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_str_invalid_call(self): + # too many args + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str("too", "many", "argu", "ments") + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str(1, "", "", 1) + + # no such kw arg + with self.assertRaisesRegex(TypeError, r"str\(\) got an unexpected keyword argument 'test'"): + str(test=1) + + # 'encoding' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, encoding=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", b"ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", encoding=b"ascii") + + # 'errors' must be str + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, "", errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", b"strict") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", errors=b"strict") + + # both positional and kwarg + with self.assertRaisesRegex(TypeError, r"argument for str\(\) given by name \('encoding'\) and position \(2\)"): + str(b"x", "utf-8", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "ignore", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "strict", errors="ignore") + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py index 80c22c6cdd..fe25bfe9f8 100644 --- a/Lib/test/test_unicode_file.py +++ b/Lib/test/test_unicode_file.py @@ -110,7 +110,7 @@ def _test_single(self, filename): os.unlink(filename) self.assertTrue(not os.path.exists(filename)) # and again with os.open. - f = os.open(filename, os.O_CREAT) + f = os.open(filename, os.O_CREAT | os.O_WRONLY) os.close(f) try: self._do_single(filename) diff --git a/Lib/test/test_unicode_file_functions.py b/Lib/test/test_unicode_file_functions.py index 47619c8807..25c16e3a0b 100644 --- a/Lib/test/test_unicode_file_functions.py +++ b/Lib/test/test_unicode_file_functions.py @@ -5,7 +5,7 @@ import unittest import warnings from unicodedata import normalize -from test.support import os_helper +from test.support import is_apple, os_helper from test import support @@ -23,13 +23,13 @@ '10_\u1fee\u1ffd', ] -# Mac OS X decomposes Unicode names, using Normal Form D. +# Apple platforms decompose Unicode names, using Normal Form D. # http://developer.apple.com/mac/library/qa/qa2001/qa1173.html # "However, most volume formats do not follow the exact specification for # these normal forms. For example, HFS Plus uses a variant of Normal Form D # in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through # U+2FAFF are not decomposed." -if sys.platform != 'darwin': +if not is_apple: filenames.extend([ # Specific code points: NFC(fn), NFD(fn), NFKC(fn) and NFKD(fn) all different '11_\u0385\u03d3\u03d4', @@ -119,11 +119,11 @@ def test_open(self): os.stat(name) self._apply_failure(os.listdir, name, self._listdir_failure) - # Skip the test on darwin, because darwin does normalize the filename to + # Skip the test on Apple platforms, because they don't normalize the filename to # NFD (a variant of Unicode NFD form). Normalize the filename to NFC, NFKC, # NFKD in Python is useless, because darwin will normalize it later and so # open(), os.stat(), etc. don't raise any exception. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') @unittest.skipIf( support.is_emscripten or support.is_wasi, "test fails on Emscripten/WASI when host platform is macOS." @@ -142,10 +142,10 @@ def test_normalize(self): self._apply_failure(os.remove, name) self._apply_failure(os.listdir, name) - # Skip the test on darwin, because darwin uses a normalization different + # Skip the test on Apple platforms, because they use a normalization different # than Python NFD normalization: filenames are different even if we use # Python NFD normalization. - @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X') + @unittest.skipIf(is_apple, 'irrelevant test on Apple platforms') def test_listdir(self): sf0 = set(self.files) with warnings.catch_warnings(): diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index d7a0ece253..60cfdaabe8 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -21,7 +21,7 @@ def test_non_bmp_normalized(self): @unittest.expectedFailure def test_invalid(self): try: - from test import badsyntax_3131 + from test.tokenizedata import badsyntax_3131 except SyntaxError as err: self.assertEqual(str(err), "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)") diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 29da4a25a3..7f49c1690f 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -11,15 +11,20 @@ import sys import unicodedata import unittest -from test.support import (open_urlresource, requires_resource, script_helper, - cpython_only, check_disallow_instantiation, - ResourceDenied) +from test.support import ( + open_urlresource, + requires_resource, + script_helper, + cpython_only, + check_disallow_instantiation, + force_not_colorized, +) class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = '4739770dd4d0e5f1b1677accfc3552ed3c8ef326' + expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e' # TODO: RUSTPYTHON @unittest.expectedFailure @@ -74,7 +79,8 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): # Update this if the database changes. Make sure to do a full rebuild # (e.g. 'make distclean && make') to get the correct checksum. - expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370' + expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba' + # TODO: RUSTPYTHON @unittest.expectedFailure @requires_resource('cpu') @@ -94,6 +100,8 @@ def test_function_checksum(self): self.db.decomposition(char), str(self.db.mirrored(char)), str(self.db.combining(char)), + unicodedata.east_asian_width(char), + self.db.name(char, ""), ] h.update(''.join(data).encode("ascii")) result = h.hexdigest() @@ -106,6 +114,28 @@ def test_name_inverse_lookup(self): if looked_name := self.db.name(char, None): self.assertEqual(self.db.lookup(looked_name), char) + def test_no_names_in_pua(self): + puas = [*range(0xe000, 0xf8ff), + *range(0xf0000, 0xfffff), + *range(0x100000, 0x10ffff)] + for i in puas: + char = chr(i) + self.assertRaises(ValueError, self.db.name, char) + + # TODO: RUSTPYTHON; LookupError: undefined character name 'LATIN SMLL LETR A' + @unittest.expectedFailure + def test_lookup_nonexistant(self): + # just make sure that lookup can fail + for nonexistant in [ + "LATIN SMLL LETR A", + "OPEN HANDS SIGHS", + "DREGS", + "HANDBUG", + "MODIFIER LETTER CYRILLIC SMALL QUESTION MARK", + "???", + ]: + self.assertRaises(KeyError, self.db.lookup, nonexistant) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_digit(self): @@ -245,6 +275,25 @@ def test_east_asian_width(self): self.assertEqual(eaw('\u2010'), 'A') self.assertEqual(eaw('\U00020000'), 'W') + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_east_asian_width_unassigned(self): + eaw = self.db.east_asian_width + # unassigned + for char in '\u0530\u0ecf\u10c6\u20fc\uaaca\U000107bd\U000115f2': + self.assertEqual(eaw(char), 'N') + self.assertIs(self.db.name(char, None), None) + + # unassigned but reserved for CJK + for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD': + self.assertEqual(eaw(char), 'W') + self.assertIs(self.db.name(char, None), None) + + # private use areas + for char in '\uE000\uF800\U000F0000\U000FFFEE\U00100000\U0010FFF0': + self.assertEqual(eaw(char), 'A') + self.assertIs(self.db.name(char, None), None) + # TODO: RUSTPYTHON @unittest.expectedFailure def test_east_asian_width_9_0_changes(self): @@ -260,6 +309,7 @@ def test_disallow_instantiation(self): # TODO: RUSTPYTHON @unittest.expectedFailure + @force_not_colorized def test_failed_import_during_compiling(self): # Issue 4367 # Decoding \N escapes requires the unicodedata module. If it can't be @@ -322,6 +372,7 @@ def test_ucd_510(self): self.assertTrue("\u1d79".upper()=='\ua77d') self.assertTrue(".".upper()=='.') + @requires_resource('cpu') def test_bug_5828(self): self.assertEqual("\u1d79".lower(), "\u1d79") # Only U+0000 should have U+0000 as its upper/lower/titlecase variant @@ -364,6 +415,7 @@ def unistr(data): return "".join([chr(x) for x in data]) @requires_resource('network') + @requires_resource('cpu') def test_normalization(self): TESTDATAFILE = "NormalizationTest.txt" TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}" diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 51b4f6041e..74df52f541 100644 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -7,8 +7,7 @@ from collections import UserString class UserStringTest( - string_tests.CommonTest, - string_tests.MixinStrUnicodeUserStringTest, + string_tests.StringLikeTest, unittest.TestCase ): From ac20b00e26ab4721aa277a092db186bd1d5754c8 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Sat, 12 Jul 2025 16:43:47 +0300 Subject: [PATCH 027/176] `str.replace` support count as keyword arg (#5954) --- Lib/test/string_tests.py | 2 -- vm/src/builtins/str.rs | 35 +++++++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 3f82b515bb..c5831c47fc 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -154,8 +154,6 @@ def test_count(self): self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) - # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count - @unittest.expectedFailure def test_count_keyword(self): self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0)) self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1)) diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index f822a124ed..9f86da3da0 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -1018,20 +1018,27 @@ impl PyStr { } #[pymethod] - fn replace(&self, old: PyStrRef, new: PyStrRef, count: OptionalArg) -> Wtf8Buf { + fn replace(&self, args: ReplaceArgs) -> Wtf8Buf { + use std::cmp::Ordering; + let s = self.as_wtf8(); - match count { - OptionalArg::Present(max_count) if max_count >= 0 => { - if max_count == 0 || (s.is_empty() && !old.is_empty()) { - // nothing to do; return the original bytes + let ReplaceArgs { old, new, count } = args; + + match count.cmp(&0) { + Ordering::Less => s.replace(old.as_wtf8(), new.as_wtf8()), + Ordering::Equal => s.to_owned(), + Ordering::Greater => { + let s_is_empty = s.is_empty(); + let old_is_empty = old.is_empty(); + + if s_is_empty && !old_is_empty { s.to_owned() - } else if s.is_empty() && old.is_empty() { + } else if s_is_empty && old_is_empty { new.as_wtf8().to_owned() } else { - s.replacen(old.as_wtf8(), new.as_wtf8(), max_count as usize) + s.replacen(old.as_wtf8(), new.as_wtf8(), count as usize) } } - _ => s.replace(old.as_wtf8(), new.as_wtf8()), } } @@ -1685,6 +1692,18 @@ impl FindArgs { } } +#[derive(FromArgs)] +struct ReplaceArgs { + #[pyarg(positional)] + old: PyStrRef, + + #[pyarg(positional)] + new: PyStrRef, + + #[pyarg(any, default = -1)] + count: isize, +} + pub fn init(ctx: &Context) { PyStr::extend_class(ctx, ctx.types.str_type); From e21ec550d4cfa65e85f1ebe2696c1998b8865790 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sun, 13 Jul 2025 00:22:41 +0900 Subject: [PATCH 028/176] Fix set___name__ and set___qualname__ deadlock (#5956) --- Lib/test/test_descr.py | 1 - vm/src/builtins/type.rs | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index b0414d5b00..7698c340c8 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4261,7 +4261,6 @@ class C(object): C.__name__ = 'D.E' self.assertEqual((C.__module__, C.__name__), (mod, 'D.E')) - @unittest.skip("TODO: RUSTPYTHON, rustpython hang") def test_evil_type_name(self): # A badly placed Py_DECREF in type_set_name led to arbitrary code # execution while the type structure was not in a sane state, and a diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 806d32f906..ded0ae4b3a 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -680,7 +680,15 @@ impl PyType { .heaptype_ext .as_ref() .expect("HEAPTYPE should have heaptype_ext"); - *heap_type.qualname.write() = str_value; + + // Use std::mem::replace to swap the new value in and get the old value out, + // then drop the old value after releasing the lock + let _old_qualname = { + let mut qualname_guard = heap_type.qualname.write(); + std::mem::replace(&mut *qualname_guard, str_value) + }; + // old_qualname is dropped here, outside the lock scope + Ok(()) } @@ -837,7 +845,13 @@ impl PyType { return Err(vm.new_value_error("type name must not contain null characters")); } - *self.heaptype_ext.as_ref().unwrap().name.write() = name; + // Use std::mem::replace to swap the new value in and get the old value out, + // then drop the old value after releasing the lock (similar to CPython's Py_SETREF) + let _old_name = { + let mut name_guard = self.heaptype_ext.as_ref().unwrap().name.write(); + std::mem::replace(&mut *name_guard, name) + }; + // old_name is dropped here, outside the lock scope Ok(()) } From 52d46326de2de4da49d503d954382c2e3343790d Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sun, 13 Jul 2025 01:00:15 +0900 Subject: [PATCH 029/176] make_closure (#5955) --- compiler/codegen/src/compile.rs | 200 +++++++++++++++----------------- 1 file changed, 95 insertions(+), 105 deletions(-) diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 0bfe4adb41..96f6f9a3c5 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -1959,24 +1959,13 @@ impl Compiler<'_> { ); } - if self.build_closure(&code) { - func_flags |= bytecode::MakeFunctionFlags::CLOSURE; - } - // Pop the special type params symbol table if type_params.is_some() { self.pop_symbol_table(); } - self.emit_load_const(ConstantData::Code { - code: Box::new(code), - }); - self.emit_load_const(ConstantData::Str { - value: qualname.into(), - }); - - // Turn code object into function object: - emit!(self, Instruction::MakeFunction(func_flags)); + // Create function with closure + self.make_closure(code, &qualname, func_flags)?; if let Some(value) = doc_str { emit!(self, Instruction::Duplicate); @@ -1993,44 +1982,86 @@ impl Compiler<'_> { self.store_name(name) } - fn build_closure(&mut self, code: &CodeObject) -> bool { - if code.freevars.is_empty() { - return false; - } - for var in &*code.freevars { - let table = self.symbol_table_stack.last().unwrap(); - let symbol = unwrap_internal( + /// Loads closure variables if needed and creates a function object + // = compiler_make_closure + fn make_closure( + &mut self, + code: CodeObject, + qualname: &str, + mut flags: bytecode::MakeFunctionFlags, + ) -> CompileResult<()> { + // Handle free variables (closure) + if !code.freevars.is_empty() { + // Build closure tuple by loading free variables + for var in &code.freevars { + let table = self.symbol_table_stack.last().unwrap(); + let symbol = match table.lookup(var) { + Some(s) => s, + None => { + return Err(self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: cannot find symbol '{var}'", + )))); + } + }; + + let parent_code = self.code_stack.last().unwrap(); + let vars = match symbol.scope { + SymbolScope::Free => &parent_code.metadata.freevars, + SymbolScope::Cell => &parent_code.metadata.cellvars, + SymbolScope::TypeParams => &parent_code.metadata.cellvars, + _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { + &parent_code.metadata.freevars + } + _ => { + return Err(self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: invalid scope for '{var}'", + )))); + } + }; + + let idx = match vars.get_index_of(var) { + Some(i) => i, + None => { + return Err(self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: cannot find '{var}' in parent vars", + )))); + } + }; + + let idx = if let SymbolScope::Free = symbol.scope { + idx + parent_code.metadata.cellvars.len() + } else { + idx + }; + + emit!(self, Instruction::LoadClosure(idx.to_u32())); + } + + // Build tuple of closure variables + emit!( self, - table - .lookup(var) - .ok_or_else(|| InternalError::MissingSymbol(var.to_owned())), - ); - let parent_code = self.code_stack.last().unwrap(); - let vars = match symbol.scope { - SymbolScope::Free => &parent_code.metadata.freevars, - SymbolScope::Cell => &parent_code.metadata.cellvars, - SymbolScope::TypeParams => &parent_code.metadata.cellvars, - _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { - &parent_code.metadata.freevars + Instruction::BuildTuple { + size: code.freevars.len().to_u32(), } - x => unreachable!( - "var {} in a {:?} should be free or cell but it's {:?}", - var, table.typ, x - ), - }; - let mut idx = vars.get_index_of(var).unwrap(); - if let SymbolScope::Free = symbol.scope { - idx += parent_code.metadata.cellvars.len(); - } - emit!(self, Instruction::LoadClosure(idx.to_u32())) + ); + + flags |= bytecode::MakeFunctionFlags::CLOSURE; } - emit!( - self, - Instruction::BuildTuple { - size: code.freevars.len().to_u32(), - } - ); - true + + // Load code object + self.emit_load_const(ConstantData::Code { + code: Box::new(code), + }); + + // Load qualified name + self.emit_load_const(ConstantData::Str { + value: qualname.into(), + }); + + // Make function with proper flags + emit!(self, Instruction::MakeFunction(flags)); + + Ok(()) } // Python/compile.c find_ann @@ -2230,15 +2261,8 @@ impl Compiler<'_> { emit!(self, Instruction::LoadNameAny(dot_type_params)); func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; - if self.build_closure(&class_code) { - func_flags |= bytecode::MakeFunctionFlags::CLOSURE; - } - - self.emit_load_const(ConstantData::Code { - code: Box::new(class_code), - }); - self.emit_load_const(ConstantData::Str { value: name.into() }); - emit!(self, Instruction::MakeFunction(func_flags)); + // Create class function with closure + self.make_closure(class_code, name, func_flags)?; self.emit_load_const(ConstantData::Str { value: name.into() }); // Compile original bases @@ -2287,34 +2311,19 @@ impl Compiler<'_> { let type_params_code = self.exit_scope(); // Execute the type params function - if self.build_closure(&type_params_code) { - // Should not need closure - } - self.emit_load_const(ConstantData::Code { - code: Box::new(type_params_code), - }); - self.emit_load_const(ConstantData::Str { - value: format!("").into(), - }); - emit!( - self, - Instruction::MakeFunction(bytecode::MakeFunctionFlags::empty()) - ); + let type_params_name = format!(""); + self.make_closure( + type_params_code, + &type_params_name, + bytecode::MakeFunctionFlags::empty(), + )?; emit!(self, Instruction::CallFunctionPositional { nargs: 0 }); } else { // Non-generic class: standard path emit!(self, Instruction::LoadBuildClass); - let mut func_flags = bytecode::MakeFunctionFlags::empty(); - if self.build_closure(&class_code) { - func_flags |= bytecode::MakeFunctionFlags::CLOSURE; - } - - self.emit_load_const(ConstantData::Code { - code: Box::new(class_code), - }); - self.emit_load_const(ConstantData::Str { value: name.into() }); - emit!(self, Instruction::MakeFunction(func_flags)); + // Create class function with closure + self.make_closure(class_code, name, bytecode::MakeFunctionFlags::empty())?; self.emit_load_const(ConstantData::Str { value: name.into() }); let call = if let Some(arguments) = arguments { @@ -4026,7 +4035,7 @@ impl Compiler<'_> { let prev_ctx = self.ctx; let name = "".to_owned(); - let mut func_flags = self + let func_flags = self .enter_function(&name, parameters.as_deref().unwrap_or(&Default::default()))?; // Set qualname for lambda @@ -4046,15 +4055,9 @@ impl Compiler<'_> { self.compile_expression(body)?; self.emit_return_value(); let code = self.exit_scope(); - if self.build_closure(&code) { - func_flags |= bytecode::MakeFunctionFlags::CLOSURE; - } - self.emit_load_const(ConstantData::Code { - code: Box::new(code), - }); - self.emit_load_const(ConstantData::Str { value: name.into() }); - // Turn code object into function object: - emit!(self, Instruction::MakeFunction(func_flags)); + + // Create lambda function with closure + self.make_closure(code, &name, func_flags)?; self.ctx = prev_ctx; } @@ -4598,21 +4601,8 @@ impl Compiler<'_> { self.ctx = prev_ctx; - let mut func_flags = bytecode::MakeFunctionFlags::empty(); - if self.build_closure(&code) { - func_flags |= bytecode::MakeFunctionFlags::CLOSURE; - } - - // List comprehension code: - self.emit_load_const(ConstantData::Code { - code: Box::new(code), - }); - - // List comprehension function name: - self.emit_load_const(ConstantData::Str { value: name.into() }); - - // Turn code object into function object: - emit!(self, Instruction::MakeFunction(func_flags)); + // Create comprehension function with closure + self.make_closure(code, name, bytecode::MakeFunctionFlags::empty())?; // Evaluate iterated item: self.compile_expression(&generators[0].iter)?; From 16aaad7aebf83ed08aa1735bb96f3aca2b3e84d5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sun, 13 Jul 2025 10:20:07 +0900 Subject: [PATCH 030/176] PyTraceback Constructor (#5958) --- Lib/test/test_raise.py | 2 -- vm/src/builtins/traceback.rs | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_raise.py b/Lib/test/test_raise.py index 94f42c84f1..3ada08f7dc 100644 --- a/Lib/test/test_raise.py +++ b/Lib/test/test_raise.py @@ -270,8 +270,6 @@ def test_attrs(self): tb.tb_next = new_tb self.assertIs(tb.tb_next, new_tb) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_constructor(self): other_tb = get_tb() frame = sys._getframe() diff --git a/vm/src/builtins/traceback.rs b/vm/src/builtins/traceback.rs index 33063132c6..05e9944e09 100644 --- a/vm/src/builtins/traceback.rs +++ b/vm/src/builtins/traceback.rs @@ -1,8 +1,9 @@ use rustpython_common::lock::PyMutex; -use super::PyType; +use super::{PyType, PyTypeRef}; use crate::{ - Context, Py, PyPayload, PyRef, class::PyClassImpl, frame::FrameRef, source::LineNumber, + Context, Py, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, frame::FrameRef, + source::LineNumber, types::Constructor, }; #[pyclass(module = false, name = "traceback", traverse)] @@ -25,7 +26,7 @@ impl PyPayload for PyTraceback { } } -#[pyclass] +#[pyclass(with(Constructor))] impl PyTraceback { pub const fn new( next: Option>, @@ -67,6 +68,18 @@ impl PyTraceback { } } +impl Constructor for PyTraceback { + type Args = (Option>, FrameRef, u32, usize); + + fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { + let (next, frame, lasti, lineno) = args; + let lineno = LineNumber::new(lineno) + .ok_or_else(|| vm.new_value_error("lineno must be positive".to_owned()))?; + let tb = PyTraceback::new(next, frame, lasti, lineno); + tb.into_ref_with_type(vm, cls).map(Into::into) + } +} + impl PyTracebackRef { pub fn iter(&self) -> impl Iterator { std::iter::successors(Some(self.clone()), |tb| tb.next.lock().clone()) From 8ab7aa2c6b5b40bb5bc907ec5c8071e18f705804 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sun, 13 Jul 2025 13:12:03 +0900 Subject: [PATCH 031/176] type.__dict__ (#5957) --- extra_tests/snippets/builtin_type.py | 12 ++++++++++ vm/src/builtins/type.rs | 33 ++++++++++++++++------------ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/extra_tests/snippets/builtin_type.py b/extra_tests/snippets/builtin_type.py index 923028f2cd..820ee36615 100644 --- a/extra_tests/snippets/builtin_type.py +++ b/extra_tests/snippets/builtin_type.py @@ -595,3 +595,15 @@ def my_repr_func(): # https://github.com/RustPython/RustPython/issues/3100 assert issubclass(types.BuiltinMethodType, types.BuiltinFunctionType) + +assert type.__dict__["__dict__"].__objclass__ is type +assert ( + type(type(type.__dict__["__dict__"]).__objclass__).__name__ == "member_descriptor" +) + + +class A(type): + pass + + +assert "__dict__" not in A.__dict__ diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index ded0ae4b3a..1e18d6fd63 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -936,9 +936,9 @@ impl Constructor for PyType { return Err(vm.new_value_error("type name must not contain null characters")); } - let (metatype, base, bases) = if bases.is_empty() { + let (metatype, base, bases, base_is_type) = if bases.is_empty() { let base = vm.ctx.types.object_type.to_owned(); - (metatype, base.clone(), vec![base]) + (metatype, base.clone(), vec![base], false) } else { let bases = bases .iter() @@ -972,8 +972,9 @@ impl Constructor for PyType { }; let base = best_base(&bases, vm)?; + let base_is_type = base.is(vm.ctx.types.type_type); - (metatype, base.to_owned(), bases) + (metatype, base.to_owned(), bases, base_is_type) }; let qualname = dict @@ -1021,17 +1022,21 @@ impl Constructor for PyType { // All *classes* should have a dict. Exceptions are *instances* of // classes that define __slots__ and instances of built-in classes // (with exceptions, e.g function) - let __dict__ = identifier!(vm, __dict__); - attributes.entry(__dict__).or_insert_with(|| { - vm.ctx - .new_static_getset( - "__dict__", - vm.ctx.types.type_type, - subtype_get_dict, - subtype_set_dict, - ) - .into() - }); + // Also, type subclasses don't need their own __dict__ descriptor + // since they inherit it from type + if !base_is_type { + let __dict__ = identifier!(vm, __dict__); + attributes.entry(__dict__).or_insert_with(|| { + vm.ctx + .new_static_getset( + "__dict__", + vm.ctx.types.type_type, + subtype_get_dict, + subtype_set_dict, + ) + .into() + }); + } // TODO: Flags is currently initialized with HAS_DICT. Should be // updated when __slots__ are supported (toggling the flag off if From 04d8d69a8c040a956c130d62a8f4595c18f958c5 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Sun, 13 Jul 2025 22:19:33 -0700 Subject: [PATCH 032/176] upgrade parts of test.support (#5686) --- Lib/test/support/hypothesis_helper.py | 7 --- Lib/test/support/smtpd.py | 46 +++++++++--------- Lib/test/support/venv.py | 70 +++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 30 deletions(-) create mode 100644 Lib/test/support/venv.py diff --git a/Lib/test/support/hypothesis_helper.py b/Lib/test/support/hypothesis_helper.py index 40f58a2f59..db93eea5e9 100644 --- a/Lib/test/support/hypothesis_helper.py +++ b/Lib/test/support/hypothesis_helper.py @@ -5,13 +5,6 @@ except ImportError: from . import _hypothesis_stubs as hypothesis else: - # Regrtest changes to use a tempdir as the working directory, so we have - # to tell Hypothesis to use the original in order to persist the database. - from .os_helper import SAVEDCWD - from hypothesis.configuration import set_hypothesis_home_dir - - set_hypothesis_home_dir(os.path.join(SAVEDCWD, ".hypothesis")) - # When using the real Hypothesis, we'll configure it to ignore occasional # slow tests (avoiding flakiness from random VM slowness in CI). hypothesis.settings.register_profile( diff --git a/Lib/test/support/smtpd.py b/Lib/test/support/smtpd.py index ec4e7d2f4c..6052232ec2 100644 --- a/Lib/test/support/smtpd.py +++ b/Lib/test/support/smtpd.py @@ -180,122 +180,122 @@ def _set_rset_state(self): @property def __server(self): warn("Access to __server attribute on SMTPChannel is deprecated, " - "use 'smtp_server' instead", DeprecationWarning, 2) + "use 'smtp_server' instead", DeprecationWarning, 2) return self.smtp_server @__server.setter def __server(self, value): warn("Setting __server attribute on SMTPChannel is deprecated, " - "set 'smtp_server' instead", DeprecationWarning, 2) + "set 'smtp_server' instead", DeprecationWarning, 2) self.smtp_server = value @property def __line(self): warn("Access to __line attribute on SMTPChannel is deprecated, " - "use 'received_lines' instead", DeprecationWarning, 2) + "use 'received_lines' instead", DeprecationWarning, 2) return self.received_lines @__line.setter def __line(self, value): warn("Setting __line attribute on SMTPChannel is deprecated, " - "set 'received_lines' instead", DeprecationWarning, 2) + "set 'received_lines' instead", DeprecationWarning, 2) self.received_lines = value @property def __state(self): warn("Access to __state attribute on SMTPChannel is deprecated, " - "use 'smtp_state' instead", DeprecationWarning, 2) + "use 'smtp_state' instead", DeprecationWarning, 2) return self.smtp_state @__state.setter def __state(self, value): warn("Setting __state attribute on SMTPChannel is deprecated, " - "set 'smtp_state' instead", DeprecationWarning, 2) + "set 'smtp_state' instead", DeprecationWarning, 2) self.smtp_state = value @property def __greeting(self): warn("Access to __greeting attribute on SMTPChannel is deprecated, " - "use 'seen_greeting' instead", DeprecationWarning, 2) + "use 'seen_greeting' instead", DeprecationWarning, 2) return self.seen_greeting @__greeting.setter def __greeting(self, value): warn("Setting __greeting attribute on SMTPChannel is deprecated, " - "set 'seen_greeting' instead", DeprecationWarning, 2) + "set 'seen_greeting' instead", DeprecationWarning, 2) self.seen_greeting = value @property def __mailfrom(self): warn("Access to __mailfrom attribute on SMTPChannel is deprecated, " - "use 'mailfrom' instead", DeprecationWarning, 2) + "use 'mailfrom' instead", DeprecationWarning, 2) return self.mailfrom @__mailfrom.setter def __mailfrom(self, value): warn("Setting __mailfrom attribute on SMTPChannel is deprecated, " - "set 'mailfrom' instead", DeprecationWarning, 2) + "set 'mailfrom' instead", DeprecationWarning, 2) self.mailfrom = value @property def __rcpttos(self): warn("Access to __rcpttos attribute on SMTPChannel is deprecated, " - "use 'rcpttos' instead", DeprecationWarning, 2) + "use 'rcpttos' instead", DeprecationWarning, 2) return self.rcpttos @__rcpttos.setter def __rcpttos(self, value): warn("Setting __rcpttos attribute on SMTPChannel is deprecated, " - "set 'rcpttos' instead", DeprecationWarning, 2) + "set 'rcpttos' instead", DeprecationWarning, 2) self.rcpttos = value @property def __data(self): warn("Access to __data attribute on SMTPChannel is deprecated, " - "use 'received_data' instead", DeprecationWarning, 2) + "use 'received_data' instead", DeprecationWarning, 2) return self.received_data @__data.setter def __data(self, value): warn("Setting __data attribute on SMTPChannel is deprecated, " - "set 'received_data' instead", DeprecationWarning, 2) + "set 'received_data' instead", DeprecationWarning, 2) self.received_data = value @property def __fqdn(self): warn("Access to __fqdn attribute on SMTPChannel is deprecated, " - "use 'fqdn' instead", DeprecationWarning, 2) + "use 'fqdn' instead", DeprecationWarning, 2) return self.fqdn @__fqdn.setter def __fqdn(self, value): warn("Setting __fqdn attribute on SMTPChannel is deprecated, " - "set 'fqdn' instead", DeprecationWarning, 2) + "set 'fqdn' instead", DeprecationWarning, 2) self.fqdn = value @property def __peer(self): warn("Access to __peer attribute on SMTPChannel is deprecated, " - "use 'peer' instead", DeprecationWarning, 2) + "use 'peer' instead", DeprecationWarning, 2) return self.peer @__peer.setter def __peer(self, value): warn("Setting __peer attribute on SMTPChannel is deprecated, " - "set 'peer' instead", DeprecationWarning, 2) + "set 'peer' instead", DeprecationWarning, 2) self.peer = value @property def __conn(self): warn("Access to __conn attribute on SMTPChannel is deprecated, " - "use 'conn' instead", DeprecationWarning, 2) + "use 'conn' instead", DeprecationWarning, 2) return self.conn @__conn.setter def __conn(self, value): warn("Setting __conn attribute on SMTPChannel is deprecated, " - "set 'conn' instead", DeprecationWarning, 2) + "set 'conn' instead", DeprecationWarning, 2) self.conn = value @property def __addr(self): warn("Access to __addr attribute on SMTPChannel is deprecated, " - "use 'addr' instead", DeprecationWarning, 2) + "use 'addr' instead", DeprecationWarning, 2) return self.addr @__addr.setter def __addr(self, value): warn("Setting __addr attribute on SMTPChannel is deprecated, " - "set 'addr' instead", DeprecationWarning, 2) + "set 'addr' instead", DeprecationWarning, 2) self.addr = value # Overrides base class for convenience. @@ -339,7 +339,7 @@ def found_terminator(self): command = line[:i].upper() arg = line[i+1:].strip() max_sz = (self.command_size_limits[command] - if self.extended_smtp else self.command_size_limit) + if self.extended_smtp else self.command_size_limit) if sz > max_sz: self.push('500 Error: line too long') return diff --git a/Lib/test/support/venv.py b/Lib/test/support/venv.py new file mode 100644 index 0000000000..78e6a51ec1 --- /dev/null +++ b/Lib/test/support/venv.py @@ -0,0 +1,70 @@ +import contextlib +import logging +import os +import subprocess +import shlex +import sys +import sysconfig +import tempfile +import venv + + +class VirtualEnvironment: + def __init__(self, prefix, **venv_create_args): + self._logger = logging.getLogger(self.__class__.__name__) + venv.create(prefix, **venv_create_args) + self._prefix = prefix + self._paths = sysconfig.get_paths( + scheme='venv', + vars={'base': self.prefix}, + expand=True, + ) + + @classmethod + @contextlib.contextmanager + def from_tmpdir(cls, *, prefix=None, dir=None, **venv_create_args): + delete = not bool(os.environ.get('PYTHON_TESTS_KEEP_VENV')) + with tempfile.TemporaryDirectory(prefix=prefix, dir=dir, delete=delete) as tmpdir: + yield cls(tmpdir, **venv_create_args) + + @property + def prefix(self): + return self._prefix + + @property + def paths(self): + return self._paths + + @property + def interpreter(self): + return os.path.join(self.paths['scripts'], os.path.basename(sys.executable)) + + def _format_output(self, name, data, indent='\t'): + if not data: + return indent + f'{name}: (none)' + if len(data.splitlines()) == 1: + return indent + f'{name}: {data}' + else: + prefixed_lines = '\n'.join(indent + '> ' + line for line in data.splitlines()) + return indent + f'{name}:\n' + prefixed_lines + + def run(self, *args, **subprocess_args): + if subprocess_args.get('shell'): + raise ValueError('Running the subprocess in shell mode is not supported.') + default_args = { + 'capture_output': True, + 'check': True, + } + try: + result = subprocess.run([self.interpreter, *args], **default_args | subprocess_args) + except subprocess.CalledProcessError as e: + if e.returncode != 0: + self._logger.error( + f'Interpreter returned non-zero exit status {e.returncode}.\n' + + self._format_output('COMMAND', shlex.join(e.cmd)) + '\n' + + self._format_output('STDOUT', e.stdout.decode()) + '\n' + + self._format_output('STDERR', e.stderr.decode()) + '\n' + ) + raise + else: + return result From ed8d7157d9bf9827329306a39ecb77481ea2f749 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:20:28 +0300 Subject: [PATCH 033/176] Update `test_{complex,float}.py` from 3.13.5 (#5961) --- Lib/test/support/numbers.py | 80 +++++++ Lib/test/test_complex.py | 409 +++++++++++++++++++----------------- Lib/test/test_float.py | 8 +- 3 files changed, 298 insertions(+), 199 deletions(-) create mode 100644 Lib/test/support/numbers.py diff --git a/Lib/test/support/numbers.py b/Lib/test/support/numbers.py new file mode 100644 index 0000000000..d5dbb41ace --- /dev/null +++ b/Lib/test/support/numbers.py @@ -0,0 +1,80 @@ +# These are shared with test_tokenize and other test modules. +# +# Note: since several test cases filter out floats by looking for "e" and ".", +# don't add hexadecimal literals that contain "e" or "E". +VALID_UNDERSCORE_LITERALS = [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xffff_ffff', + '0o5_7_7', + '1_00_00.5', + '1_00_00.5e5', + '1_00_00e5_1', + '1e1_0', + '.1_4', + '.1_4e1', + '0b_0', + '0x_f', + '0o_5', + '1_00_00j', + '1_00_00.5j', + '1_00_00e5_1j', + '.1_4j', + '(1_2.5+3_3j)', + '(.5_6j)', +] +INVALID_UNDERSCORE_LITERALS = [ + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + '0 if 1_Else 1', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '0_7', + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0.1__4', + '0.1__4j', + '0b1001__0100', + '0xffff__ffff', + '0x___', + '0o5__77', + '1e1__0', + '1e1__0j', + # Underscore right before a dot: + '1_.4', + '1_.4j', + # Underscore right after a dot: + '1._4', + '1._4j', + '._5', + '._5j', + # Underscore right after a sign: + '1.0e+_1', + '1.0e+_1j', + # Underscore right before j: + '1.4_j', + '1.4e5_j', + # Underscore right before e: + '1_e1', + '1.4_e1', + '1.4_e1j', + # Underscore right after e: + '1e_1', + '1.4e_1', + '1.4e_1j', + # Complex cases with parens: + '(1+1.5_j_)', + '(1+1.5_j)', +] diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index dd3c4f281a..86d075de8c 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -1,15 +1,19 @@ import unittest import sys from test import support -from test.test_grammar import (VALID_UNDERSCORE_LITERALS, - INVALID_UNDERSCORE_LITERALS) +from test.support.testcase import ComplexesAreIdenticalMixin +from test.support.numbers import ( + VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS, +) from random import random -from math import atan2, isnan, copysign +from math import isnan, copysign import operator INF = float("inf") NAN = float("nan") +DBL_MAX = sys.float_info.max # These tests ensure that complex math does the right thing ZERO_DIVISION = ( @@ -20,7 +24,28 @@ (1, 0+0j), ) -class ComplexTest(unittest.TestCase): +class WithIndex: + def __init__(self, value): + self.value = value + def __index__(self): + return self.value + +class WithFloat: + def __init__(self, value): + self.value = value + def __float__(self): + return self.value + +class ComplexSubclass(complex): + pass + +class WithComplex: + def __init__(self, value): + self.value = value + def __complex__(self): + return self.value + +class ComplexTest(ComplexesAreIdenticalMixin, unittest.TestCase): def assertAlmostEqual(self, a, b): if isinstance(a, complex): @@ -49,29 +74,6 @@ def assertCloseAbs(self, x, y, eps=1e-9): # check that relative difference < eps self.assertTrue(abs((x-y)/y) < eps) - def assertFloatsAreIdentical(self, x, y): - """assert that floats x and y are identical, in the sense that: - (1) both x and y are nans, or - (2) both x and y are infinities, with the same sign, or - (3) both x and y are zeros, with the same sign, or - (4) x and y are both finite and nonzero, and x == y - - """ - msg = 'floats {!r} and {!r} are not identical' - - if isnan(x) or isnan(y): - if isnan(x) and isnan(y): - return - elif x == y: - if x != 0.0: - return - # both zero; check that signs match - elif copysign(1.0, x) == copysign(1.0, y): - return - else: - msg += ': zeros have different signs' - self.fail(msg.format(x, y)) - def assertClose(self, x, y, eps=1e-9): """Return true iff complexes x and y "are close".""" self.assertCloseAbs(x.real, y.real, eps) @@ -303,6 +305,11 @@ def test_pow(self): except OverflowError: pass + # gh-113841: possible undefined division by 0 in _Py_c_pow() + x, y = 9j, 33j**3 + with self.assertRaises(OverflowError): + x**y + def test_pow_with_small_integer_exponents(self): # Check that small integer exponents are handled identically # regardless of their type. @@ -340,138 +347,93 @@ def test_boolcontext(self): def test_conjugate(self): self.assertClose(complex(5.3, 9.8).conjugate(), 5.3-9.8j) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_constructor(self): - class NS: - def __init__(self, value): self.value = value - def __complex__(self): return self.value - self.assertEqual(complex(NS(1+10j)), 1+10j) - self.assertRaises(TypeError, complex, NS(None)) - self.assertRaises(TypeError, complex, {}) - self.assertRaises(TypeError, complex, NS(1.5)) - self.assertRaises(TypeError, complex, NS(1)) - self.assertRaises(TypeError, complex, object()) - self.assertRaises(TypeError, complex, NS(4.25+0.5j), object()) - - self.assertAlmostEqual(complex("1+10j"), 1+10j) - self.assertAlmostEqual(complex(10), 10+0j) - self.assertAlmostEqual(complex(10.0), 10+0j) - self.assertAlmostEqual(complex(10), 10+0j) - self.assertAlmostEqual(complex(10+0j), 10+0j) - self.assertAlmostEqual(complex(1,10), 1+10j) - self.assertAlmostEqual(complex(1,10), 1+10j) - self.assertAlmostEqual(complex(1,10.0), 1+10j) - self.assertAlmostEqual(complex(1,10), 1+10j) - self.assertAlmostEqual(complex(1,10), 1+10j) - self.assertAlmostEqual(complex(1,10.0), 1+10j) - self.assertAlmostEqual(complex(1.0,10), 1+10j) - self.assertAlmostEqual(complex(1.0,10), 1+10j) - self.assertAlmostEqual(complex(1.0,10.0), 1+10j) - self.assertAlmostEqual(complex(3.14+0j), 3.14+0j) - self.assertAlmostEqual(complex(3.14), 3.14+0j) - self.assertAlmostEqual(complex(314), 314.0+0j) - self.assertAlmostEqual(complex(314), 314.0+0j) - self.assertAlmostEqual(complex(3.14+0j, 0j), 3.14+0j) - self.assertAlmostEqual(complex(3.14, 0.0), 3.14+0j) - self.assertAlmostEqual(complex(314, 0), 314.0+0j) - self.assertAlmostEqual(complex(314, 0), 314.0+0j) - self.assertAlmostEqual(complex(0j, 3.14j), -3.14+0j) - self.assertAlmostEqual(complex(0.0, 3.14j), -3.14+0j) - self.assertAlmostEqual(complex(0j, 3.14), 3.14j) - self.assertAlmostEqual(complex(0.0, 3.14), 3.14j) - self.assertAlmostEqual(complex("1"), 1+0j) - self.assertAlmostEqual(complex("1j"), 1j) - self.assertAlmostEqual(complex(), 0) - self.assertAlmostEqual(complex("-1"), -1) - self.assertAlmostEqual(complex("+1"), +1) - self.assertAlmostEqual(complex("(1+2j)"), 1+2j) - self.assertAlmostEqual(complex("(1.3+2.2j)"), 1.3+2.2j) - self.assertAlmostEqual(complex("3.14+1J"), 3.14+1j) - self.assertAlmostEqual(complex(" ( +3.14-6J )"), 3.14-6j) - self.assertAlmostEqual(complex(" ( +3.14-J )"), 3.14-1j) - self.assertAlmostEqual(complex(" ( +3.14+j )"), 3.14+1j) - self.assertAlmostEqual(complex("J"), 1j) - self.assertAlmostEqual(complex("( j )"), 1j) - self.assertAlmostEqual(complex("+J"), 1j) - self.assertAlmostEqual(complex("( -j)"), -1j) - self.assertAlmostEqual(complex('1e-500'), 0.0 + 0.0j) - self.assertAlmostEqual(complex('-1e-500j'), 0.0 - 0.0j) - self.assertAlmostEqual(complex('-1e-500+1e-500j'), -0.0 + 0.0j) - self.assertEqual(complex('1-1j'), 1.0 - 1j) - self.assertEqual(complex('1J'), 1j) - - class complex2(complex): pass - self.assertAlmostEqual(complex(complex2(1+1j)), 1+1j) - self.assertAlmostEqual(complex(real=17, imag=23), 17+23j) - self.assertAlmostEqual(complex(real=17+23j), 17+23j) - self.assertAlmostEqual(complex(real=17+23j, imag=23), 17+46j) - self.assertAlmostEqual(complex(real=1+2j, imag=3+4j), -3+5j) + def check(z, x, y): + self.assertIs(type(z), complex) + self.assertFloatsAreIdentical(z.real, x) + self.assertFloatsAreIdentical(z.imag, y) + + check(complex(), 0.0, 0.0) + check(complex(10), 10.0, 0.0) + check(complex(4.25), 4.25, 0.0) + check(complex(4.25+0j), 4.25, 0.0) + check(complex(4.25+0.5j), 4.25, 0.5) + check(complex(ComplexSubclass(4.25+0.5j)), 4.25, 0.5) + check(complex(WithComplex(4.25+0.5j)), 4.25, 0.5) + + check(complex(1, 10), 1.0, 10.0) + check(complex(1, 10.0), 1.0, 10.0) + check(complex(1, 4.25), 1.0, 4.25) + check(complex(1.0, 10), 1.0, 10.0) + check(complex(4.25, 10), 4.25, 10.0) + check(complex(1.0, 10.0), 1.0, 10.0) + check(complex(4.25, 0.5), 4.25, 0.5) + + check(complex(4.25+0j, 0), 4.25, 0.0) + check(complex(ComplexSubclass(4.25+0j), 0), 4.25, 0.0) + check(complex(WithComplex(4.25+0j), 0), 4.25, 0.0) + check(complex(4.25j, 0), 0.0, 4.25) + check(complex(0j, 4.25), 0.0, 4.25) + check(complex(0, 4.25+0j), 0.0, 4.25) + check(complex(0, ComplexSubclass(4.25+0j)), 0.0, 4.25) + with self.assertRaisesRegex(TypeError, + "second argument must be a number, not 'WithComplex'"): + complex(0, WithComplex(4.25+0j)) + check(complex(0.0, 4.25j), -4.25, 0.0) + check(complex(4.25+0j, 0j), 4.25, 0.0) + check(complex(4.25j, 0j), 0.0, 4.25) + check(complex(0j, 4.25+0j), 0.0, 4.25) + check(complex(0j, 4.25j), -4.25, 0.0) + + check(complex(real=4.25), 4.25, 0.0) + check(complex(real=4.25+0j), 4.25, 0.0) + check(complex(real=4.25+1.5j), 4.25, 1.5) + check(complex(imag=1.5), 0.0, 1.5) + check(complex(real=4.25, imag=1.5), 4.25, 1.5) + check(complex(4.25, imag=1.5), 4.25, 1.5) # check that the sign of a zero in the real or imaginary part - # is preserved when constructing from two floats. (These checks - # are harmless on systems without support for signed zeros.) - def split_zeros(x): - """Function that produces different results for 0. and -0.""" - return atan2(x, -1.) - - self.assertEqual(split_zeros(complex(1., 0.).imag), split_zeros(0.)) - self.assertEqual(split_zeros(complex(1., -0.).imag), split_zeros(-0.)) - self.assertEqual(split_zeros(complex(0., 1.).real), split_zeros(0.)) - self.assertEqual(split_zeros(complex(-0., 1.).real), split_zeros(-0.)) - - c = 3.14 + 1j - self.assertTrue(complex(c) is c) - del c - - self.assertRaises(TypeError, complex, "1", "1") - self.assertRaises(TypeError, complex, 1, "1") - - # SF bug 543840: complex(string) accepts strings with \0 - # Fixed in 2.3. - self.assertRaises(ValueError, complex, '1+1j\0j') - - self.assertRaises(TypeError, int, 5+3j) - self.assertRaises(TypeError, int, 5+3j) - self.assertRaises(TypeError, float, 5+3j) - self.assertRaises(ValueError, complex, "") - self.assertRaises(TypeError, complex, None) - self.assertRaisesRegex(TypeError, "not 'NoneType'", complex, None) - self.assertRaises(ValueError, complex, "\0") - self.assertRaises(ValueError, complex, "3\09") - self.assertRaises(TypeError, complex, "1", "2") - self.assertRaises(TypeError, complex, "1", 42) - self.assertRaises(TypeError, complex, 1, "2") - self.assertRaises(ValueError, complex, "1+") - self.assertRaises(ValueError, complex, "1+1j+1j") - self.assertRaises(ValueError, complex, "--") - self.assertRaises(ValueError, complex, "(1+2j") - self.assertRaises(ValueError, complex, "1+2j)") - self.assertRaises(ValueError, complex, "1+(2j)") - self.assertRaises(ValueError, complex, "(1+2j)123") - self.assertRaises(ValueError, complex, "x") - self.assertRaises(ValueError, complex, "1j+2") - self.assertRaises(ValueError, complex, "1e1ej") - self.assertRaises(ValueError, complex, "1e++1ej") - self.assertRaises(ValueError, complex, ")1+2j(") - self.assertRaisesRegex( - TypeError, + # is preserved when constructing from two floats. + for x in 1.0, -1.0: + for y in 0.0, -0.0: + check(complex(x, y), x, y) + check(complex(y, x), y, x) + + c = complex(4.25, 1.5) + self.assertIs(complex(c), c) + c2 = ComplexSubclass(c) + self.assertEqual(c2, c) + self.assertIs(type(c2), ComplexSubclass) + del c, c2 + + self.assertRaisesRegex(TypeError, "first argument must be a string or a number, not 'dict'", - complex, {1:2}, 1) - self.assertRaisesRegex( - TypeError, + complex, {}) + self.assertRaisesRegex(TypeError, + "first argument must be a string or a number, not 'NoneType'", + complex, None) + self.assertRaisesRegex(TypeError, + "first argument must be a string or a number, not 'dict'", + complex, {1:2}, 0) + self.assertRaisesRegex(TypeError, + "can't take second arg if first is a string", + complex, '1', 0) + self.assertRaisesRegex(TypeError, "second argument must be a number, not 'dict'", - complex, 1, {1:2}) - # the following three are accepted by Python 2.6 - self.assertRaises(ValueError, complex, "1..1j") - self.assertRaises(ValueError, complex, "1.11.1j") - self.assertRaises(ValueError, complex, "1e1.1j") - - # check that complex accepts long unicode strings - self.assertEqual(type(complex("1"*500)), complex) - # check whitespace processing - self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j) - # Invalid unicode string - # See bpo-34087 - self.assertRaises(ValueError, complex, '\u3053\u3093\u306b\u3061\u306f') + complex, 0, {1:2}) + self.assertRaisesRegex(TypeError, + "second arg can't be a string", + complex, 0, '1') + + self.assertRaises(TypeError, complex, WithComplex(1.5)) + self.assertRaises(TypeError, complex, WithComplex(1)) + self.assertRaises(TypeError, complex, WithComplex(None)) + self.assertRaises(TypeError, complex, WithComplex(4.25+0j), object()) + self.assertRaises(TypeError, complex, WithComplex(1.5), object()) + self.assertRaises(TypeError, complex, WithComplex(1), object()) + self.assertRaises(TypeError, complex, WithComplex(None), object()) class EvilExc(Exception): pass @@ -482,33 +444,33 @@ def __complex__(self): self.assertRaises(EvilExc, complex, evilcomplex()) - class float2: - def __init__(self, value): - self.value = value - def __float__(self): - return self.value - - self.assertAlmostEqual(complex(float2(42.)), 42) - self.assertAlmostEqual(complex(real=float2(17.), imag=float2(23.)), 17+23j) - self.assertRaises(TypeError, complex, float2(None)) - - class MyIndex: - def __init__(self, value): - self.value = value - def __index__(self): - return self.value - - self.assertAlmostEqual(complex(MyIndex(42)), 42.0+0.0j) - self.assertAlmostEqual(complex(123, MyIndex(42)), 123.0+42.0j) - self.assertRaises(OverflowError, complex, MyIndex(2**2000)) - self.assertRaises(OverflowError, complex, 123, MyIndex(2**2000)) + check(complex(WithFloat(4.25)), 4.25, 0.0) + check(complex(WithFloat(4.25), 1.5), 4.25, 1.5) + check(complex(1.5, WithFloat(4.25)), 1.5, 4.25) + self.assertRaises(TypeError, complex, WithFloat(42)) + self.assertRaises(TypeError, complex, WithFloat(42), 1.5) + self.assertRaises(TypeError, complex, 1.5, WithFloat(42)) + self.assertRaises(TypeError, complex, WithFloat(None)) + self.assertRaises(TypeError, complex, WithFloat(None), 1.5) + self.assertRaises(TypeError, complex, 1.5, WithFloat(None)) + + check(complex(WithIndex(42)), 42.0, 0.0) + check(complex(WithIndex(42), 1.5), 42.0, 1.5) + check(complex(1.5, WithIndex(42)), 1.5, 42.0) + self.assertRaises(OverflowError, complex, WithIndex(2**2000)) + self.assertRaises(OverflowError, complex, WithIndex(2**2000), 1.5) + self.assertRaises(OverflowError, complex, 1.5, WithIndex(2**2000)) + self.assertRaises(TypeError, complex, WithIndex(None)) + self.assertRaises(TypeError, complex, WithIndex(None), 1.5) + self.assertRaises(TypeError, complex, 1.5, WithIndex(None)) class MyInt: def __int__(self): return 42 self.assertRaises(TypeError, complex, MyInt()) - self.assertRaises(TypeError, complex, 123, MyInt()) + self.assertRaises(TypeError, complex, MyInt(), 1.5) + self.assertRaises(TypeError, complex, 1.5, MyInt()) class complex0(complex): """Test usage of __complex__() when inheriting from 'complex'""" @@ -528,9 +490,9 @@ class complex2(complex): def __complex__(self): return None - self.assertEqual(complex(complex0(1j)), 42j) + check(complex(complex0(1j)), 0.0, 42.0) with self.assertWarns(DeprecationWarning): - self.assertEqual(complex(complex1(1j)), 2j) + check(complex(complex1(1j)), 0.0, 2.0) self.assertRaises(TypeError, complex, complex2(1j)) def test___complex__(self): @@ -538,36 +500,93 @@ def test___complex__(self): self.assertEqual(z.__complex__(), z) self.assertEqual(type(z.__complex__()), complex) - class complex_subclass(complex): - pass - - z = complex_subclass(3 + 4j) + z = ComplexSubclass(3 + 4j) self.assertEqual(z.__complex__(), 3 + 4j) self.assertEqual(type(z.__complex__()), complex) @support.requires_IEEE_754 def test_constructor_special_numbers(self): - class complex2(complex): - pass for x in 0.0, -0.0, INF, -INF, NAN: for y in 0.0, -0.0, INF, -INF, NAN: with self.subTest(x=x, y=y): z = complex(x, y) self.assertFloatsAreIdentical(z.real, x) self.assertFloatsAreIdentical(z.imag, y) - z = complex2(x, y) - self.assertIs(type(z), complex2) + z = ComplexSubclass(x, y) + self.assertIs(type(z), ComplexSubclass) self.assertFloatsAreIdentical(z.real, x) self.assertFloatsAreIdentical(z.imag, y) - z = complex(complex2(x, y)) + z = complex(ComplexSubclass(x, y)) self.assertIs(type(z), complex) self.assertFloatsAreIdentical(z.real, x) self.assertFloatsAreIdentical(z.imag, y) - z = complex2(complex(x, y)) - self.assertIs(type(z), complex2) + z = ComplexSubclass(complex(x, y)) + self.assertIs(type(z), ComplexSubclass) self.assertFloatsAreIdentical(z.real, x) self.assertFloatsAreIdentical(z.imag, y) + def test_constructor_from_string(self): + def check(z, x, y): + self.assertIs(type(z), complex) + self.assertFloatsAreIdentical(z.real, x) + self.assertFloatsAreIdentical(z.imag, y) + + check(complex("1"), 1.0, 0.0) + check(complex("1j"), 0.0, 1.0) + check(complex("-1"), -1.0, 0.0) + check(complex("+1"), 1.0, 0.0) + check(complex("1+2j"), 1.0, 2.0) + check(complex("(1+2j)"), 1.0, 2.0) + check(complex("(1.5+4.25j)"), 1.5, 4.25) + check(complex("4.25+1J"), 4.25, 1.0) + check(complex(" ( +4.25-6J )"), 4.25, -6.0) + check(complex(" ( +4.25-J )"), 4.25, -1.0) + check(complex(" ( +4.25+j )"), 4.25, 1.0) + check(complex("J"), 0.0, 1.0) + check(complex("( j )"), 0.0, 1.0) + check(complex("+J"), 0.0, 1.0) + check(complex("( -j)"), 0.0, -1.0) + check(complex('1-1j'), 1.0, -1.0) + check(complex('1J'), 0.0, 1.0) + + check(complex('1e-500'), 0.0, 0.0) + check(complex('-1e-500j'), 0.0, -0.0) + check(complex('1e-500+1e-500j'), 0.0, 0.0) + check(complex('-1e-500+1e-500j'), -0.0, 0.0) + check(complex('1e-500-1e-500j'), 0.0, -0.0) + check(complex('-1e-500-1e-500j'), -0.0, -0.0) + + # SF bug 543840: complex(string) accepts strings with \0 + # Fixed in 2.3. + self.assertRaises(ValueError, complex, '1+1j\0j') + self.assertRaises(ValueError, complex, "") + self.assertRaises(ValueError, complex, "\0") + self.assertRaises(ValueError, complex, "3\09") + self.assertRaises(ValueError, complex, "1+") + self.assertRaises(ValueError, complex, "1+1j+1j") + self.assertRaises(ValueError, complex, "--") + self.assertRaises(ValueError, complex, "(1+2j") + self.assertRaises(ValueError, complex, "1+2j)") + self.assertRaises(ValueError, complex, "1+(2j)") + self.assertRaises(ValueError, complex, "(1+2j)123") + self.assertRaises(ValueError, complex, "x") + self.assertRaises(ValueError, complex, "1j+2") + self.assertRaises(ValueError, complex, "1e1ej") + self.assertRaises(ValueError, complex, "1e++1ej") + self.assertRaises(ValueError, complex, ")1+2j(") + # the following three are accepted by Python 2.6 + self.assertRaises(ValueError, complex, "1..1j") + self.assertRaises(ValueError, complex, "1.11.1j") + self.assertRaises(ValueError, complex, "1e1.1j") + + # check that complex accepts long unicode strings + self.assertIs(type(complex("1"*500)), complex) + # check whitespace processing + self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j) + # Invalid unicode string + # See bpo-34087 + self.assertRaises(ValueError, complex, '\u3053\u3093\u306b\u3061\u306f') + def test_constructor_negative_nans_from_string(self): self.assertEqual(copysign(1., complex("-nan").real), -1.) self.assertEqual(copysign(1., complex("-nanj").imag), -1.) @@ -589,7 +608,7 @@ def test_underscores(self): def test_hash(self): for x in range(-30, 30): self.assertEqual(hash(x), hash(complex(x, 0))) - x /= 3.0 # now check against floating point + x /= 3.0 # now check against floating-point self.assertEqual(hash(x), hash(complex(x, 0.))) self.assertNotEqual(hash(2000005 - 1j), -1) @@ -599,6 +618,8 @@ def test_abs(self): for num in nums: self.assertAlmostEqual((num.real**2 + num.imag**2) ** 0.5, abs(num)) + self.assertRaises(OverflowError, abs, complex(DBL_MAX, DBL_MAX)) + def test_repr_str(self): def test(v, expected, test_fn=self.assertEqual): test_fn(repr(v), expected) @@ -644,9 +665,6 @@ def test(v, expected, test_fn=self.assertEqual): test(complex(-0., -0.), "(-0-0j)") def test_pos(self): - class ComplexSubclass(complex): - pass - self.assertEqual(+(1+6j), 1+6j) self.assertEqual(+ComplexSubclass(1, 6), 1+6j) self.assertIs(type(+ComplexSubclass(1, 6)), complex) @@ -666,8 +684,8 @@ def test_getnewargs(self): def test_plus_minus_0j(self): # test that -0j and 0j literals are not identified z1, z2 = 0j, -0j - self.assertEqual(atan2(z1.imag, -1.), atan2(0., -1.)) - self.assertEqual(atan2(z2.imag, -1.), atan2(-0., -1.)) + self.assertFloatsAreIdentical(z1.imag, 0.0) + self.assertFloatsAreIdentical(z2.imag, -0.0) @support.requires_IEEE_754 def test_negated_imaginary_literal(self): @@ -702,8 +720,7 @@ def test_repr_roundtrip(self): for y in vals: z = complex(x, y) roundtrip = complex(repr(z)) - self.assertFloatsAreIdentical(z.real, roundtrip.real) - self.assertFloatsAreIdentical(z.imag, roundtrip.imag) + self.assertComplexesAreIdentical(z, roundtrip) # if we predefine some constants, then eval(repr(z)) should # also work, except that it might change the sign of zeros diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index d94a2bdadd..2ccad19e03 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -9,8 +9,10 @@ from test import support from test.support.testcase import FloatsAreIdenticalMixin -from test.test_grammar import (VALID_UNDERSCORE_LITERALS, - INVALID_UNDERSCORE_LITERALS) +from test.support.numbers import ( + VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS, +) from math import isinf, isnan, copysign, ldexp import math @@ -1513,4 +1515,4 @@ def __init__(self, value): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() From d42e8f00425630fcd0effcc74d89a119c197cdaa Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Mon, 14 Jul 2025 14:21:36 +0900 Subject: [PATCH 034/176] fix(sqlite): produce correct error for surrogate characters (#5962) --- Lib/test/test_sqlite3/test_userfunctions.py | 2 -- stdlib/src/sqlite.rs | 8 +++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_sqlite3/test_userfunctions.py b/Lib/test/test_sqlite3/test_userfunctions.py index 7b092365d4..e8b98a66a5 100644 --- a/Lib/test/test_sqlite3/test_userfunctions.py +++ b/Lib/test/test_sqlite3/test_userfunctions.py @@ -354,8 +354,6 @@ def test_return_non_contiguous_blob(self): cur = self.con.execute("select return_noncont_blob()") cur.fetchone() - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_param_surrogates(self): self.assertRaisesRegex(UnicodeEncodeError, "surrogates not allowed", self.con.execute, "select spam(?)", diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index cec1f04ed9..073975f8fe 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -2965,12 +2965,10 @@ mod _sqlite { } fn str_to_ptr_len(s: &PyStr, vm: &VirtualMachine) -> PyResult<(*const libc::c_char, i32)> { - let s = s - .to_str() - .ok_or_else(|| vm.new_unicode_encode_error("surrogates not allowed"))?; - let len = c_int::try_from(s.len()) + let s_str = s.try_to_str(vm)?; + let len = c_int::try_from(s_str.len()) .map_err(|_| vm.new_overflow_error("TEXT longer than INT_MAX bytes"))?; - let ptr = s.as_ptr().cast(); + let ptr = s_str.as_ptr().cast(); Ok((ptr, len)) } From 97e85b220e48a71cb1ef1c7b05b48cd70d2327b4 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:21:59 +0300 Subject: [PATCH 035/176] Update `test_{dict,weakref}.py` from 3.13.5 (#5963) * Update test_dict.py from 3.13.5 * Update `test_weakref.py` from 3.13.5 --- Lib/test/support/__init__.py | 16 +++ Lib/test/test_dict.py | 64 +++++++++-- Lib/test/test_weakref.py | 209 ++++++++++++++++++++++++++--------- 3 files changed, 226 insertions(+), 63 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 948bad1ca8..6c7e799798 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2589,6 +2589,22 @@ def adjust_int_max_str_digits(max_digits): finally: sys.set_int_max_str_digits(current) + +# From CPython 3.13.5 +def get_c_recursion_limit(): + try: + import _testcapi + return _testcapi.Py_C_RECURSION_LIMIT + except ImportError: + raise unittest.SkipTest('requires _testcapi') + + +# From CPython 3.13.5 +def exceeds_recursion_limit(): + """For recursion tests, easily exceeds default recursion limit.""" + return get_c_recursion_limit() * 3 + + #For recursion tests, easily exceeds default recursion limit EXCEEDS_RECURSION_LIMIT = 5000 diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 4aa6f1089a..9598a7ab96 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -8,7 +8,7 @@ import unittest import weakref from test import support -from test.support import import_helper, C_RECURSION_LIMIT +from test.support import import_helper, get_c_recursion_limit class DictTest(unittest.TestCase): @@ -312,17 +312,34 @@ def __setitem__(self, key, value): self.assertRaises(Exc, baddict2.fromkeys, [1]) # test fast path for dictionary inputs + res = dict(zip(range(6), [0]*6)) d = dict(zip(range(6), range(6))) - self.assertEqual(dict.fromkeys(d, 0), dict(zip(range(6), [0]*6))) - + self.assertEqual(dict.fromkeys(d, 0), res) + # test fast path for set inputs + d = set(range(6)) + self.assertEqual(dict.fromkeys(d, 0), res) + # test slow path for other iterable inputs + d = list(range(6)) + self.assertEqual(dict.fromkeys(d, 0), res) + + # test fast path when object's constructor returns large non-empty dict class baddict3(dict): def __new__(cls): return d - d = {i : i for i in range(10)} + d = {i : i for i in range(1000)} res = d.copy() res.update(a=None, b=None, c=None) self.assertEqual(baddict3.fromkeys({"a", "b", "c"}), res) + # test slow path when object is a proper subclass of dict + class baddict4(dict): + def __init__(self): + dict.__init__(self, d) + d = {i : i for i in range(1000)} + res = d.copy() + res.update(a=None, b=None, c=None) + self.assertEqual(baddict4.fromkeys({"a", "b", "c"}), res) + def test_copy(self): d = {1: 1, 2: 2, 3: 3} self.assertIsNot(d.copy(), d) @@ -596,10 +613,9 @@ def __repr__(self): d = {1: BadRepr()} self.assertRaises(Exc, repr, d) - @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON Windows') def test_repr_deep(self): d = {} - for i in range(C_RECURSION_LIMIT + 1): + for i in range(get_c_recursion_limit() + 1): d = {1: d} self.assertRaises(RecursionError, repr, d) @@ -994,6 +1010,18 @@ class MyDict(dict): pass self._tracked(MyDict()) + @support.cpython_only + def test_track_lazy_instance_dicts(self): + class C: + pass + o = C() + d = o.__dict__ + self._not_tracked(d) + o.untracked = 42 + self._not_tracked(d) + o.tracked = [] + self._tracked(d) + def make_shared_key_dict(self, n): class C: pass @@ -1108,10 +1136,8 @@ class C: a = C() a.x = 1 d = a.__dict__ - before_resize = sys.getsizeof(d) d[2] = 2 # split table is resized to a generic combined table - self.assertGreater(sys.getsizeof(d), before_resize) self.assertEqual(list(d), ['x', 2]) def test_iterator_pickling(self): @@ -1485,6 +1511,24 @@ def test_dict_items_result_gc_reversed(self): gc.collect() self.assertTrue(gc.is_tracked(next(it))) + def test_store_evilattr(self): + class EvilAttr: + def __init__(self, d): + self.d = d + + def __del__(self): + if 'attr' in self.d: + del self.d['attr'] + gc.collect() + + class Obj: + pass + + obj = Obj() + obj.__dict__ = {} + for _ in range(10): + obj.attr = EvilAttr(obj.__dict__) + def test_str_nonstr(self): # cpython uses a different lookup function if the dict only contains # `str` keys. Make sure the unoptimized path is used when a non-`str` @@ -1591,8 +1635,8 @@ class CAPITest(unittest.TestCase): # Test _PyDict_GetItem_KnownHash() @support.cpython_only def test_getitem_knownhash(self): - _testcapi = import_helper.import_module('_testcapi') - dict_getitem_knownhash = _testcapi.dict_getitem_knownhash + _testinternalcapi = import_helper.import_module('_testinternalcapi') + dict_getitem_knownhash = _testinternalcapi.dict_getitem_knownhash d = {'x': 1, 'y': 2, 'z': 3} self.assertEqual(dict_getitem_knownhash(d, 'x', hash('x')), 1) diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 7d204f3c4c..242c076f9b 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1,5 +1,6 @@ import gc import sys +import doctest import unittest import collections import weakref @@ -9,10 +10,14 @@ import threading import time import random +import textwrap from test import support -from test.support import script_helper, ALWAYS_EQ +from test.support import script_helper, ALWAYS_EQ, suppress_immortalization from test.support import gc_collect +from test.support import import_helper +from test.support import threading_helper +from test.support import is_wasi, Py_DEBUG # Used in ReferencesTestCase.test_ref_created_during_del() . ref_from_del = None @@ -77,7 +82,7 @@ def callback(self, ref): @contextlib.contextmanager -def collect_in_thread(period=0.0001): +def collect_in_thread(period=0.005): """ Ensure GC collections happen in a different thread, at a high frequency. """ @@ -114,6 +119,49 @@ def test_basic_ref(self): del o repr(wr) + @support.cpython_only + def test_ref_repr(self): + obj = C() + ref = weakref.ref(obj) + regex = ( + rf"" + ) + self.assertRegex(repr(ref), regex) + + obj = None + gc_collect() + self.assertRegex(repr(ref), + rf'') + + # test type with __name__ + class WithName: + @property + def __name__(self): + return "custom_name" + + obj2 = WithName() + ref2 = weakref.ref(obj2) + regex = ( + rf"" + ) + self.assertRegex(repr(ref2), regex) + + def test_repr_failure_gh99184(self): + class MyConfig(dict): + def __getattr__(self, x): + return self[x] + + obj = MyConfig(offset=5) + obj_weakref = weakref.ref(obj) + + self.assertIn('MyConfig', repr(obj_weakref)) + self.assertIn('MyConfig', str(obj_weakref)) + def test_basic_callback(self): self.check_basic_callback(C) self.check_basic_callback(create_function) @@ -121,7 +169,7 @@ def test_basic_callback(self): @support.cpython_only def test_cfunction(self): - import _testcapi + _testcapi = import_helper.import_module("_testcapi") create_cfunction = _testcapi.create_cfunction f = create_cfunction() wr = weakref.ref(f) @@ -182,6 +230,22 @@ def check(proxy): self.assertRaises(ReferenceError, bool, ref3) self.assertEqual(self.cbcalled, 2) + @support.cpython_only + def test_proxy_repr(self): + obj = C() + ref = weakref.proxy(obj, self.callback) + regex = ( + rf"" + ) + self.assertRegex(repr(ref), regex) + + obj = None + gc_collect() + self.assertRegex(repr(ref), + rf'') + def check_basic_ref(self, factory): o = factory() ref = weakref.ref(o) @@ -613,7 +677,8 @@ class C(object): # deallocation of c2. del c2 - def test_callback_in_cycle_1(self): + @suppress_immortalization() + def test_callback_in_cycle(self): import gc class J(object): @@ -653,40 +718,11 @@ def acallback(self, ignore): del I, J, II gc.collect() - def test_callback_in_cycle_2(self): + def test_callback_reachable_one_way(self): import gc - # This is just like test_callback_in_cycle_1, except that II is an - # old-style class. The symptom is different then: an instance of an - # old-style class looks in its own __dict__ first. 'J' happens to - # get cleared from I.__dict__ before 'wr', and 'J' was never in II's - # __dict__, so the attribute isn't found. The difference is that - # the old-style II doesn't have a NULL __mro__ (it doesn't have any - # __mro__), so no segfault occurs. Instead it got: - # test_callback_in_cycle_2 (__main__.ReferencesTestCase) ... - # Exception exceptions.AttributeError: - # "II instance has no attribute 'J'" in > ignored - - class J(object): - pass - - class II: - def acallback(self, ignore): - self.J - - I = II() - I.J = J - I.wr = weakref.ref(J, I.acallback) - - del I, J, II - gc.collect() - - def test_callback_in_cycle_3(self): - import gc - - # This one broke the first patch that fixed the last two. In this - # case, the objects reachable from the callback aren't also reachable + # This one broke the first patch that fixed the previous test. In this case, + # the objects reachable from the callback aren't also reachable # from the object (c1) *triggering* the callback: you can get to # c1 from c2, but not vice-versa. The result was that c2's __dict__ # got tp_clear'ed by the time the c2.cb callback got invoked. @@ -706,10 +742,10 @@ def cb(self, ignore): del c1, c2 gc.collect() - def test_callback_in_cycle_4(self): + def test_callback_different_classes(self): import gc - # Like test_callback_in_cycle_3, except c2 and c1 have different + # Like test_callback_reachable_one_way, except c2 and c1 have different # classes. c2's class (C) isn't reachable from c1 then, so protecting # objects reachable from the dying object (c1) isn't enough to stop # c2's class (C) from getting tp_clear'ed before c2.cb is invoked. @@ -736,6 +772,7 @@ class D: # TODO: RUSTPYTHON @unittest.expectedFailure + @suppress_immortalization() def test_callback_in_cycle_resurrection(self): import gc @@ -879,6 +916,7 @@ def test_init(self): # No exception should be raised here gc.collect() + @suppress_immortalization() def test_classes(self): # Check that classes are weakrefable. class A(object): @@ -958,6 +996,7 @@ def test_hashing(self): self.assertEqual(hash(a), hash(42)) self.assertRaises(TypeError, hash, b) + @unittest.skipIf(is_wasi and Py_DEBUG, "requires deep stack") def test_trashcan_16602(self): # Issue #16602: when a weakref's target was part of a long # deallocation chain, the trashcan mechanism could delay clearing @@ -1015,6 +1054,31 @@ def __del__(self): pass del x support.gc_collect() + @support.cpython_only + def test_no_memory_when_clearing(self): + # gh-118331: Make sure we do not raise an exception from the destructor + # when clearing weakrefs if allocating the intermediate tuple fails. + code = textwrap.dedent(""" + import _testcapi + import weakref + + class TestObj: + pass + + def callback(obj): + pass + + obj = TestObj() + # The choice of 50 is arbitrary, but must be large enough to ensure + # the allocation won't be serviced by the free list. + wrs = [weakref.ref(obj, callback) for _ in range(50)] + _testcapi.set_nomemory(0) + del obj + """).strip() + res, _ = script_helper.run_python_until_end("-c", code) + stderr = res.err.decode("ascii", "backslashreplace") + self.assertNotRegex(stderr, "_Py_Dealloc: Deallocator of type 'TestObj'") + class SubclassableWeakrefTestCase(TestBase): @@ -1267,6 +1331,12 @@ class MappingTestCase(TestBase): COUNT = 10 + if support.check_sanitizer(thread=True) and support.Py_GIL_DISABLED: + # Reduce iteration count to get acceptable latency + NUM_THREADED_ITERATIONS = 1000 + else: + NUM_THREADED_ITERATIONS = 100000 + def check_len_cycles(self, dict_type, cons): N = 20 items = [RefCycle() for i in range(N)] @@ -1898,34 +1968,56 @@ def test_make_weak_keyed_dict_repr(self): dict = weakref.WeakKeyDictionary() self.assertRegex(repr(dict), '') + @threading_helper.requires_working_threading() def test_threaded_weak_valued_setdefault(self): d = weakref.WeakValueDictionary() with collect_in_thread(): - for i in range(100000): + for i in range(self.NUM_THREADED_ITERATIONS): x = d.setdefault(10, RefCycle()) self.assertIsNot(x, None) # we never put None in there! del x + @threading_helper.requires_working_threading() def test_threaded_weak_valued_pop(self): d = weakref.WeakValueDictionary() with collect_in_thread(): - for i in range(100000): + for i in range(self.NUM_THREADED_ITERATIONS): d[10] = RefCycle() x = d.pop(10, 10) self.assertIsNot(x, None) # we never put None in there! + @threading_helper.requires_working_threading() def test_threaded_weak_valued_consistency(self): # Issue #28427: old keys should not remove new values from # WeakValueDictionary when collecting from another thread. d = weakref.WeakValueDictionary() with collect_in_thread(): - for i in range(200000): + for i in range(2 * self.NUM_THREADED_ITERATIONS): o = RefCycle() d[10] = o # o is still alive, so the dict can't be empty self.assertEqual(len(d), 1) o = None # lose ref + @support.cpython_only + def test_weak_valued_consistency(self): + # A single-threaded, deterministic repro for issue #28427: old keys + # should not remove new values from WeakValueDictionary. This relies on + # an implementation detail of CPython's WeakValueDictionary (its + # underlying dictionary of KeyedRefs) to reproduce the issue. + d = weakref.WeakValueDictionary() + with support.disable_gc(): + d[10] = RefCycle() + # Keep the KeyedRef alive after it's replaced so that GC will invoke + # the callback. + wr = d.data[10] + # Replace the value with something that isn't cyclic garbage + o = RefCycle() + d[10] = o + # Trigger GC, which will invoke the callback for `wr` + gc.collect() + self.assertEqual(len(d), 1) + def check_threaded_weak_dict_copy(self, type_, deepcopy): # `type_` should be either WeakKeyDictionary or WeakValueDictionary. # `deepcopy` should be either True or False. @@ -1987,22 +2079,28 @@ def pop_and_collect(lst): if exc: raise exc[0] + @threading_helper.requires_working_threading() def test_threaded_weak_key_dict_copy(self): # Issue #35615: Weakref keys or values getting GC'ed during dict # copying should not result in a crash. self.check_threaded_weak_dict_copy(weakref.WeakKeyDictionary, False) + @threading_helper.requires_working_threading() + @support.requires_resource('cpu') def test_threaded_weak_key_dict_deepcopy(self): # Issue #35615: Weakref keys or values getting GC'ed during dict # copying should not result in a crash. self.check_threaded_weak_dict_copy(weakref.WeakKeyDictionary, True) @unittest.skip("TODO: RUSTPYTHON; occasionally crash (Exit code -6)") + @threading_helper.requires_working_threading() def test_threaded_weak_value_dict_copy(self): # Issue #35615: Weakref keys or values getting GC'ed during dict # copying should not result in a crash. self.check_threaded_weak_dict_copy(weakref.WeakValueDictionary, False) + @threading_helper.requires_working_threading() + @support.requires_resource('cpu') def test_threaded_weak_value_dict_deepcopy(self): # Issue #35615: Weakref keys or values getting GC'ed during dict # copying should not result in a crash. @@ -2195,6 +2293,19 @@ def test_atexit(self): self.assertTrue(b'ZeroDivisionError' in err) +class ModuleTestCase(unittest.TestCase): + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_names(self): + for name in ('ReferenceType', 'ProxyType', 'CallableProxyType', + 'WeakMethod', 'WeakSet', 'WeakKeyDictionary', 'WeakValueDictionary'): + obj = getattr(weakref, name) + if name != 'WeakSet': + self.assertEqual(obj.__module__, 'weakref') + self.assertEqual(obj.__name__, name) + self.assertEqual(obj.__qualname__, name) + + libreftest = """ Doctest for examples in the library reference: weakref.rst >>> from test.support import gc_collect @@ -2283,19 +2394,11 @@ def test_atexit(self): __test__ = {'libreftest' : libreftest} -def test_main(): - support.run_unittest( - ReferencesTestCase, - WeakMethodTestCase, - MappingTestCase, - WeakValueDictionaryTestCase, - WeakKeyDictionaryTestCase, - SubclassableWeakrefTestCase, - FinalizeTestCase, - ) +def load_tests(loader, tests, pattern): # TODO: RUSTPYTHON - # support.run_doctest(sys.modules[__name__]) + # tests.addTest(doctest.DocTestSuite()) + return tests if __name__ == "__main__": - test_main() + unittest.main() From 5ab64b7002a42b38ac29c365652ba1bc3441d232 Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Mon, 14 Jul 2025 14:22:52 +0900 Subject: [PATCH 036/176] fix(sqlite): align adaptation protocol with CPython (#5964) --- Lib/test/test_sqlite3/test_types.py | 4 ---- stdlib/src/sqlite.rs | 12 ++++++++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_sqlite3/test_types.py b/Lib/test/test_sqlite3/test_types.py index 6cbf99d6ea..53df08e999 100644 --- a/Lib/test/test_sqlite3/test_types.py +++ b/Lib/test/test_sqlite3/test_types.py @@ -439,8 +439,6 @@ def test_missing_protocol(self): with self.assertRaises(sqlite.ProgrammingError): sqlite.adapt(1, None) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_defect_proto(self): class DefectProto(): def __adapt__(self): @@ -448,8 +446,6 @@ def __adapt__(self): with self.assertRaises(sqlite.ProgrammingError): sqlite.adapt(1., DefectProto) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_defect_self_adapt(self): class DefectSelfAdapt(float): def __conform__(self, _): diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index 073975f8fe..ce84ac2988 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -695,7 +695,11 @@ mod _sqlite { } if let Ok(adapter) = proto.get_attr("__adapt__", vm) { match adapter.call((obj,), vm) { - Ok(val) => return Ok(val), + Ok(val) => { + if !vm.is_none(&val) { + return Ok(val); + } + } Err(exc) => { if !exc.fast_isinstance(vm.ctx.exceptions.type_error) { return Err(exc); @@ -705,7 +709,11 @@ mod _sqlite { } if let Ok(adapter) = obj.get_attr("__conform__", vm) { match adapter.call((proto,), vm) { - Ok(val) => return Ok(val), + Ok(val) => { + if !vm.is_none(&val) { + return Ok(val); + } + } Err(exc) => { if !exc.fast_isinstance(vm.ctx.exceptions.type_error) { return Err(exc); From 4fe4ff4f998cccdb923852642485dd5cc41ef7cd Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:24:00 +0300 Subject: [PATCH 037/176] Update `test_{list,listcomps}.py` from 3.13.5 (#5965) --- Lib/test/test_list.py | 55 ++++++++++++++++++++++++++++++++++++-- Lib/test/test_listcomps.py | 13 ++++++--- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index c82bf5067d..42d8dcbbe1 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -1,6 +1,8 @@ import sys +import textwrap from test import list_tests from test.support import cpython_only +from test.support.script_helper import assert_python_ok import pickle import unittest @@ -98,8 +100,13 @@ def imul(a, b): a *= b self.assertRaises((MemoryError, OverflowError), mul, lst, n) self.assertRaises((MemoryError, OverflowError), imul, lst, n) + def test_empty_slice(self): + x = [] + x[:] = x + self.assertEqual(x, []) + # TODO: RUSTPYTHON - @unittest.skip("Crashes on windows debug build") + @unittest.skip("TODO: RUSTPYTHON crash") def test_list_resize_overflow(self): # gh-97616: test new_allocated * sizeof(PyObject*) overflow # check in list_resize() @@ -113,13 +120,28 @@ def test_list_resize_overflow(self): with self.assertRaises((MemoryError, OverflowError)): lst *= size + # TODO: RUSTPYTHON + @unittest.skip("TODO: RUSTPYTHON hangs") + def test_repr_mutate(self): + class Obj: + @staticmethod + def __repr__(): + try: + mylist.pop() + except IndexError: + pass + return 'obj' + + mylist = [Obj() for _ in range(5)] + self.assertEqual(repr(mylist), '[obj, obj, obj]') + def test_repr_large(self): # Check the repr of large list objects def check(n): l = [0] * n s = repr(l) self.assertEqual(s, - '[' + ', '.join(['0'] * n) + ']') + '[' + ', '.join(['0'] * n) + ']') check(10) # check our checking code check(1000000) @@ -302,6 +324,35 @@ def __eq__(self, other): lst = [X(), X()] X() in lst + def test_tier2_invalidates_iterator(self): + # GH-121012 + for _ in range(100): + a = [1, 2, 3] + it = iter(a) + for _ in it: + pass + a.append(4) + self.assertEqual(list(it), []) + + def test_deopt_from_append_list(self): + # gh-132011: it used to crash, because + # of `CALL_LIST_APPEND` specialization failure. + code = textwrap.dedent(""" + l = [] + def lappend(l, x, y): + l.append((x, y)) + for x in range(3): + lappend(l, None, None) + try: + lappend(list, None, None) + except TypeError: + pass + else: + raise AssertionError + """) + + rc, _, _ = assert_python_ok("-c", code) + self.assertEqual(rc, 0) if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_listcomps.py b/Lib/test/test_listcomps.py index ad1c5053a3..1380c08d28 100644 --- a/Lib/test/test_listcomps.py +++ b/Lib/test/test_listcomps.py @@ -177,7 +177,7 @@ def test_references___class___defined(self): res = [__class__ for x in [1]] """ self._check_in_scopes( - code, outputs={"res": [2]}, scopes=["module", "function"]) + code, outputs={"res": [2]}, scopes=["module", "function"]) self._check_in_scopes(code, raises=NameError, scopes=["class"]) def test_references___class___enclosing(self): @@ -648,11 +648,18 @@ def test_exception_in_post_comp_call(self): """ self._check_in_scopes(code, {"value": [1, None]}) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_frame_locals(self): code = """ - val = [sys._getframe().f_locals for a in [0]][0]["a"] + val = "a" in [sys._getframe().f_locals for a in [0]][0] """ import sys + self._check_in_scopes(code, {"val": False}, ns={"sys": sys}) + + code = """ + val = [sys._getframe().f_locals["a"] for a in [0]][0] + """ self._check_in_scopes(code, {"val": 0}, ns={"sys": sys}) def _recursive_replace(self, maybe_code): @@ -736,7 +743,7 @@ def iter_raises(): for func, expected in [(init_raises, "BrokenIter(init_raises=True)"), (next_raises, "BrokenIter(next_raises=True)"), (iter_raises, "BrokenIter(iter_raises=True)"), - ]: + ]: with self.subTest(func): exc = func() f = traceback.extract_tb(exc.__traceback__)[0] From 36f4d30e0155314972425cf6211d90736aeb6b31 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:26:08 +0300 Subject: [PATCH 038/176] Update `test_tuple.py` from 3.13.5 (#5966) --- Lib/test/test_tuple.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index d2a2ed310b..153df0e52d 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -42,6 +42,35 @@ def test_keyword_args(self): with self.assertRaisesRegex(TypeError, 'keyword argument'): tuple(sequence=()) + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_keywords_in_subclass(self): + class subclass(tuple): + pass + u = subclass([1, 2]) + self.assertIs(type(u), subclass) + self.assertEqual(list(u), [1, 2]) + with self.assertRaises(TypeError): + subclass(sequence=()) + + class subclass_with_init(tuple): + def __init__(self, arg, newarg=None): + self.newarg = newarg + u = subclass_with_init([1, 2], newarg=3) + self.assertIs(type(u), subclass_with_init) + self.assertEqual(list(u), [1, 2]) + self.assertEqual(u.newarg, 3) + + class subclass_with_new(tuple): + def __new__(cls, arg, newarg=None): + self = super().__new__(cls, arg) + self.newarg = newarg + return self + u = subclass_with_new([1, 2], newarg=3) + self.assertIs(type(u), subclass_with_new) + self.assertEqual(list(u), [1, 2]) + self.assertEqual(u.newarg, 3) + def test_truth(self): super().test_truth() self.assertTrue(not ()) From 635b4afff1ca1ff4e488995e8a04f24cbbc9e19a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 13 Jul 2025 12:56:59 +0900 Subject: [PATCH 039/176] Fix derive(Traverse) --- derive-impl/src/pytraverse.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/derive-impl/src/pytraverse.rs b/derive-impl/src/pytraverse.rs index 728722b83a..c5c4bbd270 100644 --- a/derive-impl/src/pytraverse.rs +++ b/derive-impl/src/pytraverse.rs @@ -105,8 +105,19 @@ pub(crate) fn impl_pytraverse(mut item: DeriveInput) -> Result { let ty = &item.ident; + // Add Traverse bound to all type parameters + for param in &mut item.generics.params { + if let syn::GenericParam::Type(type_param) = param { + type_param + .bounds + .push(syn::parse_quote!(::rustpython_vm::object::Traverse)); + } + } + + let (impl_generics, ty_generics, where_clause) = item.generics.split_for_impl(); + let ret = quote! { - unsafe impl ::rustpython_vm::object::Traverse for #ty { + unsafe impl #impl_generics ::rustpython_vm::object::Traverse for #ty #ty_generics #where_clause { fn traverse(&self, tracer_fn: &mut ::rustpython_vm::object::TraverseFn) { #trace_code } From 09489712e6d801722e9c1cb30a99701da15009ae Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 13 Jul 2025 14:34:46 +0900 Subject: [PATCH 040/176] PyPayload::payload_type_of --- vm/src/object/core.rs | 2 +- vm/src/object/payload.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index bb057f4906..b4b9557f2a 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -655,7 +655,7 @@ impl PyObject { #[inline(always)] pub fn payload_is(&self) -> bool { - self.0.typeid == TypeId::of::() + self.0.typeid == T::payload_type_id() } /// Force to return payload as T. diff --git a/vm/src/object/payload.rs b/vm/src/object/payload.rs index 6413d6ae06..f223af6e96 100644 --- a/vm/src/object/payload.rs +++ b/vm/src/object/payload.rs @@ -19,6 +19,10 @@ cfg_if::cfg_if! { pub trait PyPayload: std::fmt::Debug + MaybeTraverse + PyThreadingConstraint + Sized + 'static { + #[inline] + fn payload_type_id() -> std::any::TypeId { + std::any::TypeId::of::() + } fn class(ctx: &Context) -> &'static Py; #[inline] @@ -75,7 +79,7 @@ pub trait PyPayload: } pub trait PyObjectPayload: - std::any::Any + std::fmt::Debug + MaybeTraverse + PyThreadingConstraint + 'static + PyPayload + std::any::Any + std::fmt::Debug + MaybeTraverse + PyThreadingConstraint + 'static { } From 14ce76e6c81b59191f14f811621ada0bba599cbb Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 13 Jul 2025 14:39:42 +0900 Subject: [PATCH 041/176] PyTupleTyped as alias of PyTuple --- vm/src/builtins/function.rs | 15 ++-- vm/src/builtins/tuple.rs | 120 ++++++++++++++++++------------- vm/src/builtins/type.rs | 22 +++--- vm/src/convert/try_from.rs | 8 +-- vm/src/frame.rs | 14 ++-- vm/src/object/core.rs | 44 +++++------- vm/src/object/traverse_object.rs | 5 +- vm/src/vm/context.rs | 7 ++ vm/src/vm/mod.rs | 8 +-- 9 files changed, 134 insertions(+), 109 deletions(-) diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index a29a077e50..45917adcf2 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -8,7 +8,6 @@ use super::{ #[cfg(feature = "jit")] use crate::common::lock::OnceCell; use crate::common::lock::PyMutex; -use crate::convert::{ToPyObject, TryFromObject}; use crate::function::ArgMapping; use crate::object::{Traverse, TraverseFn}; use crate::{ @@ -32,7 +31,7 @@ pub struct PyFunction { code: PyRef, globals: PyDictRef, builtins: PyObjectRef, - closure: Option>, + closure: Option>>, defaults_and_kwdefaults: PyMutex<(Option, Option)>, name: PyMutex, qualname: PyMutex, @@ -47,7 +46,9 @@ pub struct PyFunction { unsafe impl Traverse for PyFunction { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.globals.traverse(tracer_fn); - self.closure.traverse(tracer_fn); + if let Some(closure) = self.closure.as_ref() { + closure.as_untyped().traverse(tracer_fn); + } self.defaults_and_kwdefaults.traverse(tracer_fn); } } @@ -58,7 +59,7 @@ impl PyFunction { pub(crate) fn new( code: PyRef, globals: PyDictRef, - closure: Option>, + closure: Option>>, defaults: Option, kw_only_defaults: Option, qualname: PyStrRef, @@ -326,6 +327,7 @@ impl Py { ) -> PyResult { #[cfg(feature = "jit")] if let Some(jitted_code) = self.jitted_code.get() { + use crate::convert::ToPyObject; match jit::get_jit_args(self, &func_args, jitted_code, vm) { Ok(args) => { return Ok(args.invoke().to_pyobject(vm)); @@ -427,7 +429,7 @@ impl PyFunction { #[pymember] fn __closure__(vm: &VirtualMachine, zelf: PyObjectRef) -> PyResult { let zelf = Self::_as_pyref(&zelf, vm)?; - Ok(vm.unwrap_or_none(zelf.closure.clone().map(|x| x.to_pyobject(vm)))) + Ok(vm.unwrap_or_none(zelf.closure.clone().map(|x| x.into()))) } #[pymember] @@ -612,8 +614,7 @@ impl Constructor for PyFunction { } // Validate that all items are cells and create typed tuple - let typed_closure = - PyTupleTyped::::try_from_object(vm, closure_tuple.into())?; + let typed_closure = closure_tuple.try_into_typed::(vm)?; Some(typed_closure) } else if !args.code.freevars.is_empty() { return Err(vm.new_type_error("arg 5 (closure) must be tuple")); diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 2ee8497dda..9f589547f0 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -3,7 +3,7 @@ use crate::common::{ hash::{PyHash, PyUHash}, lock::PyMutex, }; -use crate::object::{Traverse, TraverseFn}; +use crate::object::{MaybeTraverse, Traverse, TraverseFn}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, atomic_func, @@ -449,6 +449,24 @@ impl Representable for PyTuple { } } +impl PyRef { + pub fn try_into_typed( + self, + vm: &VirtualMachine, + ) -> PyResult>>> { + PyRef::>>::try_from_untyped(self, vm) + } + /// # Safety + /// + /// The caller must ensure that all elements in the tuple are valid instances + /// of type `T` before calling this method. This is typically verified by + /// calling `try_into_typed` first. + unsafe fn into_typed_unchecked(self) -> PyRef>> { + let obj: PyObjectRef = self.into(); + unsafe { obj.downcast_unchecked::>>() } + } +} + #[pyclass(module = false, name = "tuple_iterator", traverse)] #[derive(Debug)] pub(crate) struct PyTupleIterator { @@ -500,53 +518,75 @@ pub(crate) fn init(context: &Context) { PyTupleIterator::extend_class(context, context.types.tuple_iterator_type); } -pub struct PyTupleTyped { +#[repr(transparent)] +pub struct PyTupleTyped { // SAFETY INVARIANT: T must be repr(transparent) over PyObjectRef, and the // elements must be logically valid when transmuted to T - tuple: PyTupleRef, - _marker: PhantomData>, + tuple: PyTuple, + _marker: PhantomData, } -unsafe impl Traverse for PyTupleTyped +unsafe impl Traverse for PyTupleTyped where - T: TransmuteFromObject + Traverse, + R: TransmuteFromObject, { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.tuple.traverse(tracer_fn); } } -impl TryFromObject for PyTupleTyped { - fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { - let tuple = PyTupleRef::try_from_object(vm, obj)?; - for elem in &*tuple { - T::check(vm, elem)? - } - // SAFETY: the contract of TransmuteFromObject upholds the variant on `tuple` - Ok(Self { - tuple, - _marker: PhantomData, - }) +impl MaybeTraverse for PyTupleTyped { + const IS_TRACE: bool = true; + fn try_traverse(&self, tracer_fn: &mut TraverseFn<'_>) { + self.traverse(tracer_fn); } } -impl AsRef<[T]> for PyTupleTyped { - fn as_ref(&self) -> &[T] { - self.as_slice() +impl PyTupleTyped> { + pub fn new_ref(elements: Vec>, ctx: &Context) -> PyRef { + // SAFETY: PyRef has the same layout as PyObjectRef + unsafe { + let elements: Vec = + std::mem::transmute::>, Vec>(elements); + let tuple = PyTuple::new_ref(elements, ctx); + tuple.into_typed_unchecked::() + } } } -impl PyTupleTyped { - pub fn empty(vm: &VirtualMachine) -> Self { - Self { - tuple: vm.ctx.empty_tuple.clone(), - _marker: PhantomData, +impl PyRef>> { + pub fn into_untyped(self) -> PyRef { + // SAFETY: PyTupleTyped is transparent over PyTuple + unsafe { std::mem::transmute::>>, PyRef>(self) } + } + + pub fn try_from_untyped(tuple: PyTupleRef, vm: &VirtualMachine) -> PyResult { + // Check that all elements are of the correct type + for elem in tuple.as_slice() { + as TransmuteFromObject>::check(vm, elem)?; } + // SAFETY: We just verified all elements are of type T, and PyTupleTyped has the same layout as PyTuple + Ok(unsafe { std::mem::transmute::, PyRef>>>(tuple) }) } +} +impl Py>> { + pub fn as_untyped(&self) -> &Py { + // SAFETY: PyTupleTyped is transparent over PyTuple + unsafe { std::mem::transmute::<&Py>>, &Py>(self) } + } +} + +impl AsRef<[PyRef]> for PyTupleTyped> { + fn as_ref(&self) -> &[PyRef] { + self.as_slice() + } +} + +impl PyTupleTyped> { #[inline] - pub fn as_slice(&self) -> &[T] { - unsafe { &*(self.tuple.as_slice() as *const [PyObjectRef] as *const [T]) } + pub fn as_slice(&self) -> &[PyRef] { + unsafe { &*(self.tuple.as_slice() as *const [PyObjectRef] as *const [PyRef]) } } #[inline] @@ -560,32 +600,16 @@ impl PyTupleTyped { } } -impl Clone for PyTupleTyped { - fn clone(&self) -> Self { - Self { - tuple: self.tuple.clone(), - _marker: PhantomData, - } - } -} - -impl fmt::Debug for PyTupleTyped { +impl fmt::Debug for PyTupleTyped { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.as_slice().fmt(f) - } -} - -impl From> for PyTupleRef { - #[inline] - fn from(tup: PyTupleTyped) -> Self { - tup.tuple + self.tuple.as_slice().fmt(f) } } -impl ToPyObject for PyTupleTyped { +impl From>>> for PyTupleRef { #[inline] - fn to_pyobject(self, _vm: &VirtualMachine) -> PyObjectRef { - self.tuple.into() + fn from(tup: PyRef>>) -> Self { + tup.into_untyped() } } diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 1e18d6fd63..5a8f853bf1 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -62,7 +62,7 @@ unsafe impl crate::object::Traverse for PyType { pub struct HeapTypeExt { pub name: PyRwLock, pub qualname: PyRwLock, - pub slots: Option>, + pub slots: Option>>, pub sequence_methods: PySequenceMethods, pub mapping_methods: PyMappingMethods, } @@ -1041,15 +1041,13 @@ impl Constructor for PyType { // TODO: Flags is currently initialized with HAS_DICT. Should be // updated when __slots__ are supported (toggling the flag off if // a class has __slots__ defined). - let heaptype_slots: Option> = + let heaptype_slots: Option>> = if let Some(x) = attributes.get(identifier!(vm, __slots__)) { - Some(if x.to_owned().class().is(vm.ctx.types.str_type) { - PyTupleTyped::::try_from_object( - vm, - vec![x.to_owned()].into_pytuple(vm).into(), - )? + let slots = if x.class().is(vm.ctx.types.str_type) { + let x = unsafe { x.downcast_unchecked_ref::() }; + PyTupleTyped::new_ref(vec![x.to_owned()], &vm.ctx) } else { - let iter = x.to_owned().get_iter(vm)?; + let iter = x.get_iter(vm)?; let elements = { let mut elements = Vec::new(); while let PyIterReturn::Return(element) = iter.next(vm)? { @@ -1057,8 +1055,10 @@ impl Constructor for PyType { } elements }; - PyTupleTyped::::try_from_object(vm, elements.into_pytuple(vm).into())? - }) + let tuple = elements.into_pytuple(vm); + tuple.try_into_typed(vm)? + }; + Some(slots) } else { None }; @@ -1082,7 +1082,7 @@ impl Constructor for PyType { let heaptype_ext = HeapTypeExt { name: PyRwLock::new(name), qualname: PyRwLock::new(qualname), - slots: heaptype_slots.to_owned(), + slots: heaptype_slots.clone(), sequence_methods: PySequenceMethods::default(), mapping_methods: PyMappingMethods::default(), }; diff --git a/vm/src/convert/try_from.rs b/vm/src/convert/try_from.rs index d2d83b36e7..a875ffa231 100644 --- a/vm/src/convert/try_from.rs +++ b/vm/src/convert/try_from.rs @@ -78,12 +78,12 @@ where #[inline] fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { let class = T::class(&vm.ctx); - if obj.fast_isinstance(class) { + let result = if obj.fast_isinstance(class) { obj.downcast() - .map_err(|obj| vm.new_downcast_runtime_error(class, &obj)) } else { - Err(vm.new_downcast_type_error(class, &obj)) - } + Err(obj) + }; + result.map_err(|obj| vm.new_downcast_type_error(class, &obj)) } } diff --git a/vm/src/frame.rs b/vm/src/frame.rs index a3e31c5c2b..460ba4392e 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -7,7 +7,7 @@ use crate::{ PySlice, PyStr, PyStrInterned, PyStrRef, PyTraceback, PyType, asyncgenerator::PyAsyncGenWrappedValue, function::{PyCell, PyCellRef, PyFunction}, - tuple::{PyTuple, PyTupleRef, PyTupleTyped}, + tuple::{PyTuple, PyTupleRef}, }, bytecode, convert::{IntoObject, ToPyResult}, @@ -1346,11 +1346,14 @@ impl ExecutingFrame<'_> { #[cfg_attr(feature = "flame-it", flame("Frame"))] fn import(&mut self, vm: &VirtualMachine, module_name: Option<&Py>) -> PyResult<()> { let module_name = module_name.unwrap_or(vm.ctx.empty_str); - let from_list = >>::try_from_object(vm, self.pop_value())? - .unwrap_or_else(|| PyTupleTyped::empty(vm)); + let top = self.pop_value(); + let from_list = match >::try_from_object(vm, top)? { + Some(from_list) => from_list.try_into_typed::(vm)?, + None => vm.ctx.empty_tuple_typed().to_owned(), + }; let level = usize::try_from_object(vm, self.pop_value())?; - let module = vm.import_from(module_name, from_list, level)?; + let module = vm.import_from(module_name, &from_list, level)?; self.push_value(module); Ok(()) @@ -1839,7 +1842,8 @@ impl ExecutingFrame<'_> { .expect("Second to top value on the stack must be a code object"); let closure = if flags.contains(bytecode::MakeFunctionFlags::CLOSURE) { - Some(PyTupleTyped::try_from_object(vm, self.pop_value()).unwrap()) + let tuple = PyTupleRef::try_from_object(vm, self.pop_value()).unwrap(); + Some(tuple.try_into_typed(vm).expect("This is a compiler bug")) } else { None }; diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index b4b9557f2a..5012855133 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -15,7 +15,7 @@ use super::{ ext::{AsObject, PyRefExact, PyResult}, payload::PyObjectPayload, }; -use crate::object::traverse::{Traverse, TraverseFn}; +use crate::object::traverse::{MaybeTraverse, Traverse, TraverseFn}; use crate::object::traverse_object::PyObjVTable; use crate::{ builtins::{PyDictRef, PyType, PyTypeRef}, @@ -121,7 +121,7 @@ impl fmt::Debug for PyInner { } } -unsafe impl Traverse for Py { +unsafe impl Traverse for Py { /// DO notice that call `trace` on `Py` means apply `tracer_fn` on `Py`'s children, /// not like call `trace` on `PyRef` which apply `tracer_fn` on `PyRef` itself fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { @@ -557,7 +557,7 @@ impl PyObjectRef { /// # Safety /// T must be the exact payload type #[inline(always)] - pub unsafe fn downcast_unchecked(self) -> PyRef { + pub unsafe fn downcast_unchecked(self) -> PyRef { // PyRef::from_obj_unchecked(self) // manual impl to avoid assertion let obj = ManuallyDrop::new(self); @@ -893,7 +893,7 @@ impl fmt::Debug for PyObjectRef { } #[repr(transparent)] -pub struct Py(PyInner); +pub struct Py(PyInner); impl Py { pub fn downgrade( @@ -908,7 +908,7 @@ impl Py { } } -impl ToOwned for Py { +impl ToOwned for Py { type Owned = PyRef; #[inline(always)] @@ -920,7 +920,7 @@ impl ToOwned for Py { } } -impl Deref for Py { +impl Deref for Py { type Target = T; #[inline(always)] @@ -984,24 +984,24 @@ impl fmt::Debug for Py { /// situations (such as when implementing in-place methods such as `__iadd__`) /// where a reference to the same object must be returned. #[repr(transparent)] -pub struct PyRef { +pub struct PyRef { ptr: NonNull>, } cfg_if::cfg_if! { if #[cfg(feature = "threading")] { - unsafe impl Send for PyRef {} - unsafe impl Sync for PyRef {} + unsafe impl Send for PyRef {} + unsafe impl Sync for PyRef {} } } -impl fmt::Debug for PyRef { +impl fmt::Debug for PyRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { (**self).fmt(f) } } -impl Drop for PyRef { +impl Drop for PyRef { #[inline] fn drop(&mut self) { if self.0.ref_count.dec() { @@ -1010,7 +1010,7 @@ impl Drop for PyRef { } } -impl Clone for PyRef { +impl Clone for PyRef { #[inline(always)] fn clone(&self) -> Self { (**self).to_owned() @@ -1070,10 +1070,7 @@ where } } -impl From> for PyObjectRef -where - T: PyObjectPayload, -{ +impl From> for PyObjectRef { #[inline] fn from(value: PyRef) -> Self { let me = ManuallyDrop::new(value); @@ -1081,30 +1078,21 @@ where } } -impl Borrow> for PyRef -where - T: PyObjectPayload, -{ +impl Borrow> for PyRef { #[inline(always)] fn borrow(&self) -> &Py { self } } -impl AsRef> for PyRef -where - T: PyObjectPayload, -{ +impl AsRef> for PyRef { #[inline(always)] fn as_ref(&self) -> &Py { self } } -impl Deref for PyRef -where - T: PyObjectPayload, -{ +impl Deref for PyRef { type Target = Py; #[inline(always)] diff --git a/vm/src/object/traverse_object.rs b/vm/src/object/traverse_object.rs index ee32785950..281b0e56eb 100644 --- a/vm/src/object/traverse_object.rs +++ b/vm/src/object/traverse_object.rs @@ -3,7 +3,8 @@ use std::fmt; use crate::{ PyObject, object::{ - Erased, InstanceDict, PyInner, PyObjectPayload, debug_obj, drop_dealloc_obj, try_trace_obj, + Erased, InstanceDict, MaybeTraverse, PyInner, PyObjectPayload, debug_obj, drop_dealloc_obj, + try_trace_obj, }, }; @@ -56,7 +57,7 @@ unsafe impl Traverse for PyInner { } } -unsafe impl Traverse for PyInner { +unsafe impl Traverse for PyInner { /// Type is known, so we can call `try_trace` directly instead of using erased type vtable fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { // 1. trace `dict` and `slots` field(`typ` can't trace for it's a AtomicRef while is leaked by design) diff --git a/vm/src/vm/context.rs b/vm/src/vm/context.rs index d35b5b7f7e..4c673831e0 100644 --- a/vm/src/vm/context.rs +++ b/vm/src/vm/context.rs @@ -11,6 +11,7 @@ use crate::{ }, getset::PyGetSet, object, pystr, + tuple::PyTupleTyped, type_::PyAttributes, }, class::{PyClassImpl, StaticType}, @@ -373,6 +374,12 @@ impl Context { self.not_implemented.clone().into() } + #[inline] + pub fn empty_tuple_typed(&self) -> &Py> { + let py: &Py = &self.empty_tuple; + unsafe { std::mem::transmute(py) } + } + // universal pyref constructor pub fn new_pyref(&self, value: T) -> PyRef

where diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 4a319c9635..dbfa2147b3 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -599,7 +599,7 @@ impl VirtualMachine { #[inline] pub fn import<'a>(&self, module_name: impl AsPyStr<'a>, level: usize) -> PyResult { let module_name = module_name.as_pystr(&self.ctx); - let from_list = PyTupleTyped::empty(self); + let from_list = self.ctx.empty_tuple_typed(); self.import_inner(module_name, from_list, level) } @@ -609,7 +609,7 @@ impl VirtualMachine { pub fn import_from<'a>( &self, module_name: impl AsPyStr<'a>, - from_list: PyTupleTyped, + from_list: &Py>, level: usize, ) -> PyResult { let module_name = module_name.as_pystr(&self.ctx); @@ -619,7 +619,7 @@ impl VirtualMachine { fn import_inner( &self, module: &Py, - from_list: PyTupleTyped, + from_list: &Py>, level: usize, ) -> PyResult { // if the import inputs seem weird, e.g a package import or something, rather than just @@ -657,7 +657,7 @@ impl VirtualMachine { } else { (None, None) }; - let from_list = from_list.to_pyobject(self); + let from_list: PyObjectRef = from_list.to_owned().into(); import_func .call((module.to_owned(), globals, locals, from_list, level), self) .inspect_err(|exc| import::remove_importlib_frames(self, exc)) From 6342ad4fa7ec073aa85967e7f2a4c6bcfd2df103 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 14 Jul 2025 14:27:31 +0900 Subject: [PATCH 042/176] Fully integrate PyTupleTyped into PyTuple --- vm/src/builtins/function.rs | 8 +- vm/src/builtins/tuple.rs | 213 +++++++++++++----------------------- vm/src/builtins/type.rs | 10 +- vm/src/vm/context.rs | 3 +- vm/src/vm/mod.rs | 9 +- 5 files changed, 92 insertions(+), 151 deletions(-) diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index 45917adcf2..16cb3e420f 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -2,8 +2,8 @@ mod jit; use super::{ - PyAsyncGen, PyCode, PyCoroutine, PyDictRef, PyGenerator, PyStr, PyStrRef, PyTupleRef, PyType, - PyTypeRef, tuple::PyTupleTyped, + PyAsyncGen, PyCode, PyCoroutine, PyDictRef, PyGenerator, PyStr, PyStrRef, PyTuple, PyTupleRef, + PyType, PyTypeRef, }; #[cfg(feature = "jit")] use crate::common::lock::OnceCell; @@ -31,7 +31,7 @@ pub struct PyFunction { code: PyRef, globals: PyDictRef, builtins: PyObjectRef, - closure: Option>>, + closure: Option>>, defaults_and_kwdefaults: PyMutex<(Option, Option)>, name: PyMutex, qualname: PyMutex, @@ -59,7 +59,7 @@ impl PyFunction { pub(crate) fn new( code: PyRef, globals: PyDictRef, - closure: Option>>, + closure: Option>>, defaults: Option, kw_only_defaults: Option, qualname: PyStrRef, diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 9f589547f0..2c3255b249 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -3,7 +3,6 @@ use crate::common::{ hash::{PyHash, PyUHash}, lock::PyMutex, }; -use crate::object::{MaybeTraverse, Traverse, TraverseFn}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, atomic_func, @@ -22,14 +21,14 @@ use crate::{ utils::collection_repr, vm::VirtualMachine, }; -use std::{fmt, marker::PhantomData, sync::LazyLock}; +use std::{fmt, sync::LazyLock}; #[pyclass(module = false, name = "tuple", traverse)] -pub struct PyTuple { - elements: Box<[PyObjectRef]>, +pub struct PyTuple { + elements: Box<[R]>, } -impl fmt::Debug for PyTuple { +impl fmt::Debug for PyTuple { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // TODO: implement more informational, non-recursive Debug formatter f.write_str("tuple") @@ -140,39 +139,60 @@ impl Constructor for PyTuple { } } -impl AsRef<[PyObjectRef]> for PyTuple { - fn as_ref(&self) -> &[PyObjectRef] { - self.as_slice() +impl AsRef<[R]> for PyTuple { + fn as_ref(&self) -> &[R] { + &self.elements } } -impl std::ops::Deref for PyTuple { - type Target = [PyObjectRef]; +impl std::ops::Deref for PyTuple { + type Target = [R]; - fn deref(&self) -> &[PyObjectRef] { - self.as_slice() + fn deref(&self) -> &[R] { + &self.elements } } -impl<'a> std::iter::IntoIterator for &'a PyTuple { - type Item = &'a PyObjectRef; - type IntoIter = std::slice::Iter<'a, PyObjectRef>; +impl<'a, R> std::iter::IntoIterator for &'a PyTuple { + type Item = &'a R; + type IntoIter = std::slice::Iter<'a, R>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl<'a> std::iter::IntoIterator for &'a Py { - type Item = &'a PyObjectRef; - type IntoIter = std::slice::Iter<'a, PyObjectRef>; +impl<'a, R> std::iter::IntoIterator for &'a Py> { + type Item = &'a R; + type IntoIter = std::slice::Iter<'a, R>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl PyTuple { +impl PyTuple { + pub const fn as_slice(&self) -> &[R] { + &self.elements + } + + #[inline] + pub fn len(&self) -> usize { + self.elements.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.elements.is_empty() + } + + #[inline] + pub fn iter(&self) -> std::slice::Iter<'_, R> { + self.elements.iter() + } +} + +impl PyTuple { pub fn new_ref(elements: Vec, ctx: &Context) -> PyRef { if elements.is_empty() { ctx.empty_tuple.clone() @@ -189,10 +209,6 @@ impl PyTuple { Self { elements } } - pub const fn as_slice(&self) -> &[PyObjectRef] { - &self.elements - } - fn repeat(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { Ok(if zelf.elements.is_empty() || value == 0 { vm.ctx.empty_tuple.clone() @@ -214,6 +230,18 @@ impl PyTuple { } } +impl PyTuple> { + pub fn new_ref_typed(elements: Vec>, ctx: &Context) -> PyRef>> { + // SAFETY: PyRef has the same layout as PyObjectRef + unsafe { + let elements: Vec = + std::mem::transmute::>, Vec>(elements); + let tuple = PyTuple::::new_ref(elements, ctx); + std::mem::transmute::, PyRef>>>(tuple) + } + } +} + #[pyclass( flags(BASETYPE), with( @@ -272,11 +300,6 @@ impl PyTuple { self.elements.len() } - #[inline] - pub const fn is_empty(&self) -> bool { - self.elements.is_empty() - } - #[pymethod(name = "__rmul__")] #[pymethod] fn __mul__(zelf: PyRef, value: ArgSize, vm: &VirtualMachine) -> PyResult> { @@ -449,21 +472,38 @@ impl Representable for PyTuple { } } -impl PyRef { +impl PyRef> { pub fn try_into_typed( self, vm: &VirtualMachine, - ) -> PyResult>>> { - PyRef::>>::try_from_untyped(self, vm) + ) -> PyResult>>> { + // Check that all elements are of the correct type + for elem in self.as_slice() { + as TransmuteFromObject>::check(vm, elem)?; + } + // SAFETY: We just verified all elements are of type T + Ok(unsafe { std::mem::transmute::, PyRef>>>(self) }) + } +} + +impl PyRef>> { + pub fn into_untyped(self) -> PyRef { + // SAFETY: PyTuple> has the same layout as PyTuple + unsafe { std::mem::transmute::>>, PyRef>(self) } } - /// # Safety - /// - /// The caller must ensure that all elements in the tuple are valid instances - /// of type `T` before calling this method. This is typically verified by - /// calling `try_into_typed` first. - unsafe fn into_typed_unchecked(self) -> PyRef>> { - let obj: PyObjectRef = self.into(); - unsafe { obj.downcast_unchecked::>>() } +} + +impl Py>> { + pub fn as_untyped(&self) -> &Py { + // SAFETY: PyTuple> has the same layout as PyTuple + unsafe { std::mem::transmute::<&Py>>, &Py>(self) } + } +} + +impl From>>> for PyTupleRef { + #[inline] + fn from(tup: PyRef>>) -> Self { + tup.into_untyped() } } @@ -518,101 +558,6 @@ pub(crate) fn init(context: &Context) { PyTupleIterator::extend_class(context, context.types.tuple_iterator_type); } -#[repr(transparent)] -pub struct PyTupleTyped { - // SAFETY INVARIANT: T must be repr(transparent) over PyObjectRef, and the - // elements must be logically valid when transmuted to T - tuple: PyTuple, - _marker: PhantomData, -} - -unsafe impl Traverse for PyTupleTyped -where - R: TransmuteFromObject, -{ - fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { - self.tuple.traverse(tracer_fn); - } -} - -impl MaybeTraverse for PyTupleTyped { - const IS_TRACE: bool = true; - fn try_traverse(&self, tracer_fn: &mut TraverseFn<'_>) { - self.traverse(tracer_fn); - } -} - -impl PyTupleTyped> { - pub fn new_ref(elements: Vec>, ctx: &Context) -> PyRef { - // SAFETY: PyRef has the same layout as PyObjectRef - unsafe { - let elements: Vec = - std::mem::transmute::>, Vec>(elements); - let tuple = PyTuple::new_ref(elements, ctx); - tuple.into_typed_unchecked::() - } - } -} - -impl PyRef>> { - pub fn into_untyped(self) -> PyRef { - // SAFETY: PyTupleTyped is transparent over PyTuple - unsafe { std::mem::transmute::>>, PyRef>(self) } - } - - pub fn try_from_untyped(tuple: PyTupleRef, vm: &VirtualMachine) -> PyResult { - // Check that all elements are of the correct type - for elem in tuple.as_slice() { - as TransmuteFromObject>::check(vm, elem)?; - } - // SAFETY: We just verified all elements are of type T, and PyTupleTyped has the same layout as PyTuple - Ok(unsafe { std::mem::transmute::, PyRef>>>(tuple) }) - } -} - -impl Py>> { - pub fn as_untyped(&self) -> &Py { - // SAFETY: PyTupleTyped is transparent over PyTuple - unsafe { std::mem::transmute::<&Py>>, &Py>(self) } - } -} - -impl AsRef<[PyRef]> for PyTupleTyped> { - fn as_ref(&self) -> &[PyRef] { - self.as_slice() - } -} - -impl PyTupleTyped> { - #[inline] - pub fn as_slice(&self) -> &[PyRef] { - unsafe { &*(self.tuple.as_slice() as *const [PyObjectRef] as *const [PyRef]) } - } - - #[inline] - pub fn len(&self) -> usize { - self.tuple.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.tuple.is_empty() - } -} - -impl fmt::Debug for PyTupleTyped { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.tuple.as_slice().fmt(f) - } -} - -impl From>>> for PyTupleRef { - #[inline] - fn from(tup: PyRef>>) -> Self { - tup.into_untyped() - } -} - pub(super) fn tuple_hash(elements: &[PyObjectRef], vm: &VirtualMachine) -> PyResult { #[cfg(target_pointer_width = "64")] const PRIME1: PyUHash = 11400714785074694791; diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 5a8f853bf1..f2a4fde3b9 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -1,5 +1,5 @@ use super::{ - PyClassMethod, PyDictRef, PyList, PyStr, PyStrInterned, PyStrRef, PyTuple, PyTupleRef, PyWeak, + PyClassMethod, PyDictRef, PyList, PyStr, PyStrInterned, PyStrRef, PyTupleRef, PyWeak, mappingproxy::PyMappingProxy, object, union_, }; use crate::{ @@ -12,7 +12,7 @@ use crate::{ PyMemberDescriptor, }, function::PyCellRef, - tuple::{IntoPyTuple, PyTupleTyped}, + tuple::{IntoPyTuple, PyTuple}, }, class::{PyClassImpl, StaticType}, common::{ @@ -62,7 +62,7 @@ unsafe impl crate::object::Traverse for PyType { pub struct HeapTypeExt { pub name: PyRwLock, pub qualname: PyRwLock, - pub slots: Option>>, + pub slots: Option>>, pub sequence_methods: PySequenceMethods, pub mapping_methods: PyMappingMethods, } @@ -1041,11 +1041,11 @@ impl Constructor for PyType { // TODO: Flags is currently initialized with HAS_DICT. Should be // updated when __slots__ are supported (toggling the flag off if // a class has __slots__ defined). - let heaptype_slots: Option>> = + let heaptype_slots: Option>> = if let Some(x) = attributes.get(identifier!(vm, __slots__)) { let slots = if x.class().is(vm.ctx.types.str_type) { let x = unsafe { x.downcast_unchecked_ref::() }; - PyTupleTyped::new_ref(vec![x.to_owned()], &vm.ctx) + PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx) } else { let iter = x.get_iter(vm)?; let elements = { diff --git a/vm/src/vm/context.rs b/vm/src/vm/context.rs index 4c673831e0..6707288151 100644 --- a/vm/src/vm/context.rs +++ b/vm/src/vm/context.rs @@ -11,7 +11,6 @@ use crate::{ }, getset::PyGetSet, object, pystr, - tuple::PyTupleTyped, type_::PyAttributes, }, class::{PyClassImpl, StaticType}, @@ -375,7 +374,7 @@ impl Context { } #[inline] - pub fn empty_tuple_typed(&self) -> &Py> { + pub fn empty_tuple_typed(&self) -> &Py> { let py: &Py = &self.empty_tuple; unsafe { std::mem::transmute(py) } } diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index dbfa2147b3..498c7e39d1 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -20,10 +20,7 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, builtins::{ PyBaseExceptionRef, PyDictRef, PyInt, PyList, PyModule, PyStr, PyStrInterned, PyStrRef, - PyTypeRef, - code::PyCode, - pystr::AsPyStr, - tuple::{PyTuple, PyTupleTyped}, + PyTypeRef, code::PyCode, pystr::AsPyStr, tuple::PyTuple, }, codecs::CodecsRegistry, common::{hash::HashSecret, lock::PyMutex, rc::PyRc}, @@ -609,7 +606,7 @@ impl VirtualMachine { pub fn import_from<'a>( &self, module_name: impl AsPyStr<'a>, - from_list: &Py>, + from_list: &Py>, level: usize, ) -> PyResult { let module_name = module_name.as_pystr(&self.ctx); @@ -619,7 +616,7 @@ impl VirtualMachine { fn import_inner( &self, module: &Py, - from_list: &Py>, + from_list: &Py>, level: usize, ) -> PyResult { // if the import inputs seem weird, e.g a package import or something, rather than just From 406be9cd15b36e1952ca8eb56408e83281b2aa09 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 14 Jul 2025 20:12:22 +0900 Subject: [PATCH 043/176] Upgrade radium to 1.1.1 --- Cargo.lock | 5 +++-- Cargo.toml | 3 +-- example_projects/barebone/Cargo.toml | 1 - example_projects/frozen_stdlib/Cargo.toml | 1 - wasm/wasm-unknown-test/Cargo.toml | 1 - 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6dee806c7..50ec28b1ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1958,8 +1958,9 @@ checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" [[package]] name = "radium" -version = "1.1.0" -source = "git+https://github.com/youknowone/ferrilab?branch=fix-nightly#4a301c3a223e096626a2773d1a1eed1fc4e21140" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1775bc532a9bfde46e26eba441ca1171b91608d14a3bae71fea371f18a00cffe" dependencies = [ "cfg-if", ] diff --git a/Cargo.toml b/Cargo.toml index 1fdc77d261..440855aba5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,7 +82,6 @@ opt-level = 3 lto = "thin" [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } # REDOX START, Uncomment when you want to compile/check with redoxer # REDOX END @@ -190,7 +189,7 @@ paste = "1.0.15" proc-macro2 = "1.0.93" pymath = "0.0.2" quote = "1.0.38" -radium = "1.1" +radium = "1.1.1" rand = "0.9" rand_core = { version = "0.9", features = ["os_rng"] } rustix = { version = "1.0", features = ["event"] } diff --git a/example_projects/barebone/Cargo.toml b/example_projects/barebone/Cargo.toml index a993277f31..8bc49c237f 100644 --- a/example_projects/barebone/Cargo.toml +++ b/example_projects/barebone/Cargo.toml @@ -9,4 +9,3 @@ rustpython-vm = { path = "../../vm", default-features = false } [workspace] [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } diff --git a/example_projects/frozen_stdlib/Cargo.toml b/example_projects/frozen_stdlib/Cargo.toml index be1b1eb16c..78a88988d8 100644 --- a/example_projects/frozen_stdlib/Cargo.toml +++ b/example_projects/frozen_stdlib/Cargo.toml @@ -11,4 +11,3 @@ rustpython-pylib = { path = "../../pylib", default-features = false, features = [workspace] [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } diff --git a/wasm/wasm-unknown-test/Cargo.toml b/wasm/wasm-unknown-test/Cargo.toml index 5945f69006..ed8c9fcb02 100644 --- a/wasm/wasm-unknown-test/Cargo.toml +++ b/wasm/wasm-unknown-test/Cargo.toml @@ -13,4 +13,3 @@ rustpython-vm = { path = "../../vm", default-features = false, features = ["comp [workspace] [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } From dd4f0c3a9f48a3ad28b46a11c266112465c39d98 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 14 Jul 2025 20:21:31 +0900 Subject: [PATCH 044/176] fix lint --- vm/src/vm/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 498c7e39d1..6993897598 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -14,8 +14,6 @@ mod vm_new; mod vm_object; mod vm_ops; -#[cfg(not(feature = "stdio"))] -use crate::builtins::PyNone; use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, builtins::{ @@ -337,7 +335,8 @@ impl VirtualMachine { Ok(stdio) }; #[cfg(not(feature = "stdio"))] - let make_stdio = |_name, _fd, _write| Ok(PyNone.into_pyobject(self)); + let make_stdio = + |_name, _fd, _write| Ok(crate::builtins::PyNone.into_pyobject(self)); let set_stdio = |name, fd, write| { let stdio = make_stdio(name, fd, write)?; From fd35c7a70634b78f85f2b3278f8da57b52ef6734 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:54:44 +0900 Subject: [PATCH 045/176] Impl Drop for PyAtomicRef (#5970) --- vm/src/object/ext.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vm/src/object/ext.rs b/vm/src/object/ext.rs index 2815d2b20e..1e2b78d9a9 100644 --- a/vm/src/object/ext.rs +++ b/vm/src/object/ext.rs @@ -245,6 +245,19 @@ pub struct PyAtomicRef { _phantom: PhantomData, } +impl Drop for PyAtomicRef { + fn drop(&mut self) { + // SAFETY: We are dropping the atomic reference, so we can safely + // release the pointer. + unsafe { + let ptr = Radium::swap(&self.inner, null_mut(), Ordering::Relaxed); + if let Some(ptr) = NonNull::::new(ptr.cast()) { + let _: PyObjectRef = PyObjectRef::from_raw(ptr); + } + } + } +} + cfg_if::cfg_if! { if #[cfg(feature = "threading")] { unsafe impl Send for PyAtomicRef {} From ed433837b309d00f307a1e453fd8d60efce441bd Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Tue, 15 Jul 2025 00:54:42 +0900 Subject: [PATCH 046/176] Introduce PyUtf8Str and fix(sqlite): validate surrogates in SQL statements (#5969) * fix(sqlite): validate surrogates in SQL statements * Add `PyUtf8Str` wrapper for safe conversion --- Lib/test/test_sqlite3/test_regression.py | 2 - stdlib/src/sqlite.rs | 9 +++-- vm/src/builtins/str.rs | 49 +++++++++++++++++++++--- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_sqlite3/test_regression.py b/Lib/test/test_sqlite3/test_regression.py index dfcf3b11f5..870958ceee 100644 --- a/Lib/test/test_sqlite3/test_regression.py +++ b/Lib/test/test_sqlite3/test_regression.py @@ -343,8 +343,6 @@ def test_null_character(self): self.assertRaisesRegex(sqlite.ProgrammingError, "null char", cur.execute, query) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_surrogates(self): con = sqlite.connect(":memory:") self.assertRaises(UnicodeEncodeError, con, "select '\ud8ff'") diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index ce84ac2988..4e9620eeab 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -844,7 +844,7 @@ mod _sqlite { type Args = (PyStrRef,); fn call(zelf: &Py, args: Self::Args, vm: &VirtualMachine) -> PyResult { - if let Some(stmt) = Statement::new(zelf, &args.0, vm)? { + if let Some(stmt) = Statement::new(zelf, args.0, vm)? { Ok(stmt.into_ref(&vm.ctx).into()) } else { Ok(vm.ctx.none()) @@ -1480,7 +1480,7 @@ mod _sqlite { stmt.lock().reset(); } - let Some(stmt) = Statement::new(&zelf.connection, &sql, vm)? else { + let Some(stmt) = Statement::new(&zelf.connection, sql, vm)? else { drop(inner); return Ok(zelf); }; @@ -1552,7 +1552,7 @@ mod _sqlite { stmt.lock().reset(); } - let Some(stmt) = Statement::new(&zelf.connection, &sql, vm)? else { + let Some(stmt) = Statement::new(&zelf.connection, sql, vm)? else { drop(inner); return Ok(zelf); }; @@ -2291,9 +2291,10 @@ mod _sqlite { impl Statement { fn new( connection: &Connection, - sql: &PyStr, + sql: PyStrRef, vm: &VirtualMachine, ) -> PyResult> { + let sql = sql.try_into_utf8(vm)?; let sql_cstr = sql.to_cstring(vm)?; let sql_len = sql.byte_len() + 1; diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 9f86da3da0..73349c6141 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -37,8 +37,8 @@ use rustpython_common::{ str::DeduceStrKind, wtf8::{CodePoint, Wtf8, Wtf8Buf, Wtf8Chunk}, }; -use std::sync::LazyLock; use std::{borrow::Cow, char, fmt, ops::Range}; +use std::{mem, sync::LazyLock}; use unic_ucd_bidi::BidiClass; use unic_ucd_category::GeneralCategory; use unic_ucd_ident::{is_xid_continue, is_xid_start}; @@ -80,6 +80,30 @@ impl fmt::Debug for PyStr { } } +#[repr(transparent)] +#[derive(Debug)] +pub struct PyUtf8Str(PyStr); + +// TODO: Remove this Deref which may hide missing optimized methods of PyUtf8Str +impl std::ops::Deref for PyUtf8Str { + type Target = PyStr; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl PyUtf8Str { + /// Returns the underlying string slice. + pub fn as_str(&self) -> &str { + debug_assert!( + self.0.is_utf8(), + "PyUtf8Str invariant violated: inner string is not valid UTF-8" + ); + // Safety: This is safe because the type invariant guarantees UTF-8 validity. + unsafe { self.0.to_str().unwrap_unchecked() } + } +} + impl AsRef for PyStr { #[track_caller] // <- can remove this once it doesn't panic fn as_ref(&self) -> &str { @@ -433,21 +457,29 @@ impl PyStr { self.data.as_str() } - pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { - self.to_str().ok_or_else(|| { + fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.is_utf8() { + Ok(()) + } else { let start = self .as_wtf8() .code_points() .position(|c| c.to_char().is_none()) .unwrap(); - vm.new_unicode_encode_error_real( + Err(vm.new_unicode_encode_error_real( identifier!(vm, utf_8).to_owned(), vm.ctx.new_str(self.data.clone()), start, start + 1, vm.ctx.new_str("surrogates not allowed"), - ) - }) + )) + } + } + + pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { + self.ensure_valid_utf8(vm)?; + // SAFETY: ensure_valid_utf8 passed, so unwrap is safe. + Ok(unsafe { self.to_str().unwrap_unchecked() }) } pub fn to_string_lossy(&self) -> Cow<'_, str> { @@ -1486,6 +1518,11 @@ impl PyStrRef { s.push_wtf8(other); *self = PyStr::from(s).into_ref(&vm.ctx); } + + pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult> { + self.ensure_valid_utf8(vm)?; + Ok(unsafe { mem::transmute::, PyRef>(self) }) + } } impl Representable for PyStr { From d4f85cf0737974cbae15f38d73cc0245ab59f0f9 Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Tue, 15 Jul 2025 01:45:42 +0900 Subject: [PATCH 047/176] Provide detailed error for circular `from` imports (#5972) --- Lib/test/test_import/__init__.py | 2 -- vm/src/frame.rs | 48 ++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 89e5ec1534..44e7da1033 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -1380,8 +1380,6 @@ def test_crossreference2(self): self.assertIn('partially initialized module', errmsg) self.assertIn('circular import', errmsg) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_circular_from_import(self): with self.assertRaises(ImportError) as cm: import test.test_import.data.circular_imports.from_cycle1 diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 460ba4392e..28a6ece4da 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -1363,19 +1363,38 @@ impl ExecutingFrame<'_> { fn import_from(&mut self, vm: &VirtualMachine, idx: bytecode::NameIdx) -> PyResult { let module = self.top_value(); let name = self.code.names[idx as usize]; - let err = || vm.new_import_error(format!("cannot import name '{name}'"), name.to_owned()); + // Load attribute, and transform any error into import error. if let Some(obj) = vm.get_attribute_opt(module.to_owned(), name)? { return Ok(obj); } // fallback to importing '{module.__name__}.{name}' from sys.modules - let mod_name = module - .get_attr(identifier!(vm, __name__), vm) - .map_err(|_| err())?; - let mod_name = mod_name.downcast::().map_err(|_| err())?; - let full_mod_name = format!("{mod_name}.{name}"); - let sys_modules = vm.sys_module.get_attr("modules", vm).map_err(|_| err())?; - sys_modules.get_item(&full_mod_name, vm).map_err(|_| err()) + let fallback_module = (|| { + let mod_name = module.get_attr(identifier!(vm, __name__), vm).ok()?; + let mod_name = mod_name.downcast_ref::()?; + let full_mod_name = format!("{mod_name}.{name}"); + let sys_modules = vm.sys_module.get_attr("modules", vm).ok()?; + sys_modules.get_item(&full_mod_name, vm).ok() + })(); + + if let Some(sub_module) = fallback_module { + return Ok(sub_module); + } + + if is_module_initializing(module, vm) { + let module_name = module + .get_attr(identifier!(vm, __name__), vm) + .ok() + .and_then(|n| n.downcast_ref::().map(|s| s.as_str().to_owned())) + .unwrap_or_else(|| "".to_owned()); + + let msg = format!( + "cannot import name '{name}' from partially initialized module '{module_name}' (most likely due to a circular import)", + ); + Err(vm.new_import_error(msg, name.to_owned())) + } else { + Err(vm.new_import_error(format!("cannot import name '{name}'"), name.to_owned())) + } } #[cfg_attr(feature = "flame-it", flame("Frame"))] @@ -2372,3 +2391,16 @@ impl fmt::Debug for Frame { ) } } + +fn is_module_initializing(module: &PyObject, vm: &VirtualMachine) -> bool { + let Ok(spec) = module.get_attr(&vm.ctx.new_str("__spec__"), vm) else { + return false; + }; + if vm.is_none(&spec) { + return false; + } + let Ok(initializing_attr) = spec.get_attr(&vm.ctx.new_str("_initializing"), vm) else { + return false; + }; + initializing_attr.try_to_bool(vm).unwrap_or(false) +} From 1d3603419efb83088dcf96bcebebe91ad53ea6a5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Tue, 15 Jul 2025 03:12:23 +0900 Subject: [PATCH 048/176] SetFunctionAttribute (#5968) * PyRef::into_non_null * SetFunctionAttribute * set_function_attribute * frame helper in PyFuncion * remove closure lock * cleanup unused args --- Lib/test/test_funcattrs.py | 2 - Lib/test/test_reprlib.py | 2 - Lib/test/test_typing.py | 2 - compiler/codegen/src/compile.rs | 347 +++++++++++++++++++--------- compiler/codegen/src/symboltable.rs | 1 + compiler/core/src/bytecode.rs | 22 +- jit/tests/common.rs | 45 +++- vm/src/builtins/function.rs | 146 ++++++++---- vm/src/frame.rs | 100 +++----- vm/src/object/core.rs | 23 +- vm/src/vm/mod.rs | 14 +- 11 files changed, 440 insertions(+), 264 deletions(-) diff --git a/Lib/test/test_funcattrs.py b/Lib/test/test_funcattrs.py index 5fd268fd90..3d5378092b 100644 --- a/Lib/test/test_funcattrs.py +++ b/Lib/test/test_funcattrs.py @@ -176,8 +176,6 @@ def test___name__(self): self.assertEqual(self.fi.a.__name__, 'a') self.cannot_set_attr(self.fi.a, "__name__", 'a', AttributeError) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test___qualname__(self): # PEP 3155 self.assertEqual(self.b.__qualname__, 'FuncAttrsTest.setUp..b') diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index 396be4b104..738b48f562 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -176,8 +176,6 @@ def test_instance(self): self.assertTrue(s.endswith(">")) self.assertIn(s.find("..."), [12, 13]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_lambda(self): r = repr(lambda x: x) self.assertTrue(r.startswith(". { Ok(()) } - fn enter_function( - &mut self, - name: &str, - parameters: &Parameters, - ) -> CompileResult { - let defaults: Vec<_> = std::iter::empty() - .chain(¶meters.posonlyargs) - .chain(¶meters.args) - .filter_map(|x| x.default.as_deref()) - .collect(); - let have_defaults = !defaults.is_empty(); - if have_defaults { - // Construct a tuple: - let size = defaults.len().to_u32(); - for element in &defaults { - self.compile_expression(element)?; - } - emit!(self, Instruction::BuildTuple { size }); - } - + fn enter_function(&mut self, name: &str, parameters: &Parameters) -> CompileResult<()> { // TODO: partition_in_place let mut kw_without_defaults = vec![]; let mut kw_with_defaults = vec![]; @@ -1513,31 +1494,6 @@ impl Compiler<'_> { } } - // let (kw_without_defaults, kw_with_defaults) = args.split_kwonlyargs(); - if !kw_with_defaults.is_empty() { - let default_kw_count = kw_with_defaults.len(); - for (arg, default) in kw_with_defaults.iter() { - self.emit_load_const(ConstantData::Str { - value: arg.name.as_str().into(), - }); - self.compile_expression(default)?; - } - emit!( - self, - Instruction::BuildMap { - size: default_kw_count.to_u32(), - } - ); - } - - let mut func_flags = bytecode::MakeFunctionFlags::empty(); - if have_defaults { - func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; - } - if !kw_with_defaults.is_empty() { - func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; - } - self.push_output( bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, parameters.posonlyargs.len().to_u32(), @@ -1565,7 +1521,7 @@ impl Compiler<'_> { self.varname(name.name.as_str())?; } - Ok(func_flags) + Ok(()) } fn prepare_decorators(&mut self, decorator_list: &[Decorator]) -> CompileResult<()> { @@ -1869,7 +1825,57 @@ impl Compiler<'_> { self.push_symbol_table(); } - let mut func_flags = self.enter_function(name, parameters)?; + // Prepare defaults and kwdefaults before entering function + let defaults: Vec<_> = std::iter::empty() + .chain(¶meters.posonlyargs) + .chain(¶meters.args) + .filter_map(|x| x.default.as_deref()) + .collect(); + let have_defaults = !defaults.is_empty(); + + // Compile defaults before entering function scope + if have_defaults { + // Construct a tuple: + let size = defaults.len().to_u32(); + for element in &defaults { + self.compile_expression(element)?; + } + emit!(self, Instruction::BuildTuple { size }); + } + + // Prepare keyword-only defaults + let mut kw_with_defaults = vec![]; + for kwonlyarg in ¶meters.kwonlyargs { + if let Some(default) = &kwonlyarg.default { + kw_with_defaults.push((&kwonlyarg.parameter, default)); + } + } + + let have_kwdefaults = !kw_with_defaults.is_empty(); + if have_kwdefaults { + let default_kw_count = kw_with_defaults.len(); + for (arg, default) in kw_with_defaults.iter() { + self.emit_load_const(ConstantData::Str { + value: arg.name.as_str().into(), + }); + self.compile_expression(default)?; + } + emit!( + self, + Instruction::BuildMap { + size: default_kw_count.to_u32(), + } + ); + } + + self.enter_function(name, parameters)?; + let mut func_flags = bytecode::MakeFunctionFlags::empty(); + if have_defaults { + func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; + } + if have_kwdefaults { + func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; + } self.current_code_info() .flags .set(bytecode::CodeFlags::IS_COROUTINE, is_async); @@ -1888,7 +1894,7 @@ impl Compiler<'_> { }; // Set qualname using the new method - let qualname = self.set_qualname(); + self.set_qualname(); let (doc_str, body) = split_doc(body, &self.opts); @@ -1965,7 +1971,7 @@ impl Compiler<'_> { } // Create function with closure - self.make_closure(code, &qualname, func_flags)?; + self.make_closure(code, func_flags)?; if let Some(value) = doc_str { emit!(self, Instruction::Duplicate); @@ -1982,58 +1988,92 @@ impl Compiler<'_> { self.store_name(name) } + /// Determines if a variable should be CELL or FREE type + // = get_ref_type + fn get_ref_type(&self, name: &str) -> Result { + // Special handling for __class__ and __classdict__ in class scope + if self.ctx.in_class && (name == "__class__" || name == "__classdict__") { + return Ok(SymbolScope::Cell); + } + + let table = self.symbol_table_stack.last().unwrap(); + match table.lookup(name) { + Some(symbol) => match symbol.scope { + SymbolScope::Cell | SymbolScope::TypeParams => Ok(SymbolScope::Cell), + SymbolScope::Free => Ok(SymbolScope::Free), + _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => Ok(SymbolScope::Free), + _ => Err(CodegenErrorType::SyntaxError(format!( + "get_ref_type: invalid scope for '{name}'" + ))), + }, + None => Err(CodegenErrorType::SyntaxError(format!( + "get_ref_type: cannot find symbol '{name}'" + ))), + } + } + /// Loads closure variables if needed and creates a function object // = compiler_make_closure fn make_closure( &mut self, code: CodeObject, - qualname: &str, - mut flags: bytecode::MakeFunctionFlags, + flags: bytecode::MakeFunctionFlags, ) -> CompileResult<()> { // Handle free variables (closure) - if !code.freevars.is_empty() { + let has_freevars = !code.freevars.is_empty(); + if has_freevars { // Build closure tuple by loading free variables + for var in &code.freevars { - let table = self.symbol_table_stack.last().unwrap(); - let symbol = match table.lookup(var) { - Some(s) => s, - None => { - return Err(self.error(CodegenErrorType::SyntaxError(format!( - "compiler_make_closure: cannot find symbol '{var}'", - )))); - } - }; + // Special case: If a class contains a method with a + // free variable that has the same name as a method, + // the name will be considered free *and* local in the + // class. It should be handled by the closure, as + // well as by the normal name lookup logic. + + // Get reference type using our get_ref_type function + let ref_type = self.get_ref_type(var).map_err(|e| self.error(e))?; + // Get parent code info let parent_code = self.code_stack.last().unwrap(); - let vars = match symbol.scope { - SymbolScope::Free => &parent_code.metadata.freevars, - SymbolScope::Cell => &parent_code.metadata.cellvars, - SymbolScope::TypeParams => &parent_code.metadata.cellvars, - _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { - &parent_code.metadata.freevars - } + let cellvars_len = parent_code.metadata.cellvars.len(); + + // Look up the variable index based on reference type + let idx = match ref_type { + SymbolScope::Cell => parent_code + .metadata + .cellvars + .get_index_of(var) + .or_else(|| { + parent_code + .metadata + .freevars + .get_index_of(var) + .map(|i| i + cellvars_len) + }) + .ok_or_else(|| { + self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: cannot find '{var}' in parent vars", + ))) + })?, + SymbolScope::Free => parent_code + .metadata + .freevars + .get_index_of(var) + .map(|i| i + cellvars_len) + .or_else(|| parent_code.metadata.cellvars.get_index_of(var)) + .ok_or_else(|| { + self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: cannot find '{var}' in parent vars", + ))) + })?, _ => { return Err(self.error(CodegenErrorType::SyntaxError(format!( - "compiler_make_closure: invalid scope for '{var}'", + "compiler_make_closure: unexpected ref_type {ref_type:?} for '{var}'", )))); } }; - let idx = match vars.get_index_of(var) { - Some(i) => i, - None => { - return Err(self.error(CodegenErrorType::SyntaxError(format!( - "compiler_make_closure: cannot find '{var}' in parent vars", - )))); - } - }; - - let idx = if let SymbolScope::Free = symbol.scope { - idx + parent_code.metadata.cellvars.len() - } else { - idx - }; - emit!(self, Instruction::LoadClosure(idx.to_u32())); } @@ -2044,22 +2084,73 @@ impl Compiler<'_> { size: code.freevars.len().to_u32(), } ); - - flags |= bytecode::MakeFunctionFlags::CLOSURE; } - // Load code object + // load code object and create function self.emit_load_const(ConstantData::Code { code: Box::new(code), }); - // Load qualified name - self.emit_load_const(ConstantData::Str { - value: qualname.into(), - }); + // Create function with no flags + emit!(self, Instruction::MakeFunction); + + // Now set attributes one by one using SET_FUNCTION_ATTRIBUTE + // Note: The order matters! Values must be on stack before calling SET_FUNCTION_ATTRIBUTE + + // Set closure if needed + if has_freevars { + // Closure tuple is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::CLOSURE + } + ); + } + + // Set annotations if present + if flags.contains(bytecode::MakeFunctionFlags::ANNOTATIONS) { + // Annotations dict is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::ANNOTATIONS + } + ); + } - // Make function with proper flags - emit!(self, Instruction::MakeFunction(flags)); + // Set kwdefaults if present + if flags.contains(bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS) { + // kwdefaults dict is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS + } + ); + } + + // Set defaults if present + if flags.contains(bytecode::MakeFunctionFlags::DEFAULTS) { + // defaults tuple is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::DEFAULTS + } + ); + } + + // Set type_params if present + if flags.contains(bytecode::MakeFunctionFlags::TYPE_PARAMS) { + // type_params tuple is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::TYPE_PARAMS + } + ); + } Ok(()) } @@ -2262,7 +2353,7 @@ impl Compiler<'_> { func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; // Create class function with closure - self.make_closure(class_code, name, func_flags)?; + self.make_closure(class_code, func_flags)?; self.emit_load_const(ConstantData::Str { value: name.into() }); // Compile original bases @@ -2311,19 +2402,14 @@ impl Compiler<'_> { let type_params_code = self.exit_scope(); // Execute the type params function - let type_params_name = format!(""); - self.make_closure( - type_params_code, - &type_params_name, - bytecode::MakeFunctionFlags::empty(), - )?; + self.make_closure(type_params_code, bytecode::MakeFunctionFlags::empty())?; emit!(self, Instruction::CallFunctionPositional { nargs: 0 }); } else { // Non-generic class: standard path emit!(self, Instruction::LoadBuildClass); // Create class function with closure - self.make_closure(class_code, name, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(class_code, bytecode::MakeFunctionFlags::empty())?; self.emit_load_const(ConstantData::Str { value: name.into() }); let call = if let Some(arguments) = arguments { @@ -4033,10 +4119,59 @@ impl Compiler<'_> { parameters, body, .. }) => { let prev_ctx = self.ctx; - let name = "".to_owned(); - let func_flags = self - .enter_function(&name, parameters.as_deref().unwrap_or(&Default::default()))?; + let default_params = Default::default(); + let params = parameters.as_deref().unwrap_or(&default_params); + + // Prepare defaults before entering function + let defaults: Vec<_> = std::iter::empty() + .chain(¶ms.posonlyargs) + .chain(¶ms.args) + .filter_map(|x| x.default.as_deref()) + .collect(); + let have_defaults = !defaults.is_empty(); + + if have_defaults { + let size = defaults.len().to_u32(); + for element in &defaults { + self.compile_expression(element)?; + } + emit!(self, Instruction::BuildTuple { size }); + } + + // Prepare keyword-only defaults + let mut kw_with_defaults = vec![]; + for kwonlyarg in ¶ms.kwonlyargs { + if let Some(default) = &kwonlyarg.default { + kw_with_defaults.push((&kwonlyarg.parameter, default)); + } + } + + let have_kwdefaults = !kw_with_defaults.is_empty(); + if have_kwdefaults { + let default_kw_count = kw_with_defaults.len(); + for (arg, default) in kw_with_defaults.iter() { + self.emit_load_const(ConstantData::Str { + value: arg.name.as_str().into(), + }); + self.compile_expression(default)?; + } + emit!( + self, + Instruction::BuildMap { + size: default_kw_count.to_u32(), + } + ); + } + + self.enter_function(&name, params)?; + let mut func_flags = bytecode::MakeFunctionFlags::empty(); + if have_defaults { + func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; + } + if have_kwdefaults { + func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; + } // Set qualname for lambda self.set_qualname(); @@ -4057,7 +4192,7 @@ impl Compiler<'_> { let code = self.exit_scope(); // Create lambda function with closure - self.make_closure(code, &name, func_flags)?; + self.make_closure(code, func_flags)?; self.ctx = prev_ctx; } @@ -4602,7 +4737,7 @@ impl Compiler<'_> { self.ctx = prev_ctx; // Create comprehension function with closure - self.make_closure(code, name, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(code, bytecode::MakeFunctionFlags::empty())?; // Evaluate iterated item: self.compile_expression(&generators[0].iter)?; diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 16a65bca11..7f7355bd73 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -127,6 +127,7 @@ pub enum SymbolScope { GlobalImplicit, Free, Cell, + // TODO: wrong place. not a symbol scope, but a COMPILER_SCOPE_TYPEPARAMS TypeParams, } diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index 3e74fe6273..0a6f3bf20d 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -528,7 +528,10 @@ pub enum Instruction { JumpIfFalseOrPop { target: Arg

+ ''""" + s = f'