Skip to content

Try simple-minded call expression cache #19505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions mypy/binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ def __init__(self, options: Options) -> None:
# flexible inference of variable types (--allow-redefinition-new).
self.bind_all = options.allow_redefinition_new

# This tracks any externally visible changes in binder to invalidate
# expression caches when needed.
self.version = 0

def _get_id(self) -> int:
self.next_id += 1
return self.next_id
Expand All @@ -158,6 +162,7 @@ def push_frame(self, conditional_frame: bool = False) -> Frame:
return f

def _put(self, key: Key, type: Type, from_assignment: bool, index: int = -1) -> None:
self.version += 1
self.frames[index].types[key] = CurrentType(type, from_assignment)

def _get(self, key: Key, index: int = -1) -> CurrentType | None:
Expand Down Expand Up @@ -185,6 +190,7 @@ def put(self, expr: Expression, typ: Type, *, from_assignment: bool = True) -> N
self._put(key, typ, from_assignment)

def unreachable(self) -> None:
self.version += 1
self.frames[-1].unreachable = True

def suppress_unreachable_warnings(self) -> None:
Expand Down
7 changes: 5 additions & 2 deletions mypy/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,6 @@ def reset(self) -> None:
self.binder = ConditionalTypeBinder(self.options)
self._type_maps[1:] = []
self._type_maps[0].clear()
self.temp_type_map = None
self.expr_checker.reset()
self.deferred_nodes = []
self.partial_types = []
Expand Down Expand Up @@ -3019,6 +3018,8 @@ def visit_block(self, b: Block) -> None:
break
else:
self.accept(s)
# Clear expression cache after each statement to avoid unlimited growth.
self.expr_checker.expr_cache.clear()

def should_report_unreachable_issues(self) -> bool:
return (
Expand Down Expand Up @@ -4000,7 +4001,7 @@ def check_multi_assignment_from_union(
for t, lv in zip(transposed, self.flatten_lvalues(lvalues)):
# We can access _type_maps directly since temporary type maps are
# only created within expressions.
t.append(self._type_maps[0].pop(lv, AnyType(TypeOfAny.special_form)))
t.append(self._type_maps[-1].pop(lv, AnyType(TypeOfAny.special_form)))
union_types = tuple(make_simplified_union(col) for col in transposed)
for expr, items in assignments.items():
# Bind a union of types collected in 'assignments' to every expression.
Expand Down Expand Up @@ -4659,6 +4660,8 @@ def replace_partial_type(
) -> None:
"""Replace the partial type of var with a non-partial type."""
var.type = new_type
# Updating a partial type should invalidate expression caches.
self.binder.version += 1
del partial_types[var]
if self.options.allow_redefinition_new:
# When using --allow-redefinition-new, binder tracks all types of
Expand Down
45 changes: 44 additions & 1 deletion mypy/checkexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from mypy.checkmember import analyze_member_access, has_operator
from mypy.checkstrformat import StringFormatterChecker
from mypy.erasetype import erase_type, remove_instance_last_known_values, replace_meta_vars
from mypy.errors import ErrorWatcher, report_internal_error
from mypy.errors import ErrorInfo, ErrorWatcher, report_internal_error
from mypy.expandtype import (
expand_type,
expand_type_by_instance,
Expand Down Expand Up @@ -355,9 +355,15 @@ def __init__(
type_state.infer_polymorphic = not self.chk.options.old_type_inference

self._arg_infer_context_cache = None
self.expr_cache: dict[
tuple[Expression, Type | None],
tuple[int, Type, list[ErrorInfo], dict[Expression, Type]],
] = {}
self.in_lambda_expr = False

def reset(self) -> None:
self.resolved_type = {}
self.expr_cache.clear()

def visit_name_expr(self, e: NameExpr) -> Type:
"""Type check a name expression.
Expand Down Expand Up @@ -5404,6 +5410,8 @@ def find_typeddict_context(

def visit_lambda_expr(self, e: LambdaExpr) -> Type:
"""Type check lambda expression."""
old_in_lambda = self.in_lambda_expr
self.in_lambda_expr = True
self.chk.check_default_args(e, body_is_trivial=False)
inferred_type, type_override = self.infer_lambda_type_using_context(e)
if not inferred_type:
Expand All @@ -5424,6 +5432,7 @@ def visit_lambda_expr(self, e: LambdaExpr) -> Type:
ret_type = self.accept(e.expr(), allow_none_return=True)
fallback = self.named_type("builtins.function")
self.chk.return_types.pop()
self.in_lambda_expr = old_in_lambda
return callable_type(e, fallback, ret_type)
else:
# Type context available.
Expand All @@ -5436,6 +5445,7 @@ def visit_lambda_expr(self, e: LambdaExpr) -> Type:
self.accept(e.expr(), allow_none_return=True)
ret_type = self.chk.lookup_type(e.expr())
self.chk.return_types.pop()
self.in_lambda_expr = old_in_lambda
return replace_callable_return_type(inferred_type, ret_type)

def infer_lambda_type_using_context(
Expand Down Expand Up @@ -5980,6 +5990,24 @@ def accept(
typ = self.visit_conditional_expr(node, allow_none_return=True)
elif allow_none_return and isinstance(node, AwaitExpr):
typ = self.visit_await_expr(node, allow_none_return=True)
# Deeply nested generic calls can deteriorate performance dramatically.
# Although in most cases caching makes little difference, in worst case
# it avoids exponential complexity.
# We cannot use cache inside lambdas, because they skip immediate type
# context, and use enclosing one, see infer_lambda_type_using_context().
# TODO: consider using cache for more expression kinds.
elif isinstance(node, (CallExpr, ListExpr, TupleExpr)) and not (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be difficult to allow dicts and sets here? Inline dictionaries are relatively common and even heavier than lists, and sets just for consistency.

Also operator exprs can be really heavy (#14978) and are fundamentally similar to CallExpr, are they worth considering?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with dicts/sets is that I see around 0.3% regression on self-check when I add them (but maybe this is just noise). My reasoning is that most code has a bunch of shallow dictionaries, and for those caching is just busy-work that will never be used (note caching is not free, since mypyc is slow on creating local type maps and watchers).

Anyway, I am open to considering more expression kinds to cache, but lets put those in separate PR(s).

self.in_lambda_expr or self.chk.current_node_deferred
):
if (node, type_context) in self.expr_cache:
binder_version, typ, messages, type_map = self.expr_cache[(node, type_context)]
if binder_version == self.chk.binder.version:
self.chk.store_types(type_map)
self.msg.add_errors(messages)
else:
typ = self.accept_maybe_cache(node, type_context=type_context)
else:
typ = self.accept_maybe_cache(node, type_context=type_context)
else:
typ = node.accept(self)
except Exception as err:
Expand Down Expand Up @@ -6010,6 +6038,21 @@ def accept(
self.in_expression = False
return result

def accept_maybe_cache(self, node: Expression, type_context: Type | None = None) -> Type:
binder_version = self.chk.binder.version
# Micro-optimization: inline local_type_map() as it is somewhat slow in mypyc.
type_map: dict[Expression, Type] = {}
self.chk._type_maps.append(type_map)
with self.msg.filter_errors(filter_errors=True, save_filtered_errors=True) as msg:
typ = node.accept(self)
messages = msg.filtered_errors()
if binder_version == self.chk.binder.version and not self.chk.current_node_deferred:
self.expr_cache[(node, type_context)] = (binder_version, typ, messages, type_map)
self.chk._type_maps.pop()
self.chk.store_types(type_map)
self.msg.add_errors(messages)
return typ

def named_type(self, name: str) -> Instance:
"""Return an instance type with type given by the name and no type
arguments. Alias for TypeChecker.named_type.
Expand Down
6 changes: 4 additions & 2 deletions mypy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ class Errors:
# in some cases to avoid reporting huge numbers of errors.
seen_import_error = False

_watchers: list[ErrorWatcher] = []
_watchers: list[ErrorWatcher]

def __init__(
self,
Expand Down Expand Up @@ -421,6 +421,7 @@ def initialize(self) -> None:
self.scope = None
self.target_module = None
self.seen_import_error = False
self._watchers = []

def reset(self) -> None:
self.initialize()
Expand Down Expand Up @@ -931,7 +932,8 @@ def prefer_simple_messages(self) -> bool:
if self.file in self.ignored_files:
# Errors ignored, so no point generating fancy messages
return True
for _watcher in self._watchers:
if self._watchers:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain this change? Watchers used to be additive and that sounded reasonable to me...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, if any of the active watchers was ignoring errors, we could use simpler messages, but in presence of caching this is not valid anymore. For example, we can accept an expression when there is enclosing ignoring watcher, but then the caching watcher will record simple message, and if next time we, by chance, accept same expression in same type context, but without the ignoring watcher, an incorrect (i.e. way too terse) error message will be pulled from the cache.

Without this change 6 tests fail because of terse/simplistic error messages are used.

_watcher = self._watchers[-1]
if _watcher._filter is True and _watcher._filtered is None:
# Errors are filtered
return True
Expand Down
23 changes: 23 additions & 0 deletions test-data/unit/check-overloading.test
Original file line number Diff line number Diff line change
Expand Up @@ -6801,3 +6801,26 @@ class D(Generic[T]):
a: D[str] # E: Type argument "str" of "D" must be a subtype of "C"
reveal_type(a.f(1)) # N: Revealed type is "builtins.int"
reveal_type(a.f("x")) # N: Revealed type is "builtins.str"

[case testMultiAssignFromUnionInOverloadCached]
from typing import Iterable, overload, Union, Optional

@overload
def always_bytes(str_or_bytes: None) -> None: ...
@overload
def always_bytes(str_or_bytes: Union[str, bytes]) -> bytes: ...
def always_bytes(str_or_bytes: Union[None, str, bytes]) -> Optional[bytes]:
pass

class Headers:
def __init__(self, iter: Iterable[tuple[bytes, bytes]]) -> None: ...

headers: Union[Headers, dict[Union[str, bytes], Union[str, bytes]], Iterable[tuple[bytes, bytes]]]

if isinstance(headers, dict):
headers = Headers(
(always_bytes(k), always_bytes(v)) for k, v in headers.items()
)

reveal_type(headers) # N: Revealed type is "Union[__main__.Headers, typing.Iterable[tuple[builtins.bytes, builtins.bytes]]]"
[builtins fixtures/isinstancelist.pyi]
2 changes: 2 additions & 0 deletions test-data/unit/fixtures/isinstancelist.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class bool(int): pass
class str:
def __add__(self, x: str) -> str: pass
def __getitem__(self, x: int) -> str: pass
class bytes: pass

T = TypeVar('T')
KT = TypeVar('KT')
Expand All @@ -52,6 +53,7 @@ class dict(Mapping[KT, VT]):
def __setitem__(self, k: KT, v: VT) -> None: pass
def __iter__(self) -> Iterator[KT]: pass
def update(self, a: Mapping[KT, VT]) -> None: pass
def items(self) -> Iterable[Tuple[KT, VT]]: pass

class set(Generic[T]):
def __iter__(self) -> Iterator[T]: pass
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy