Skip to content

Commit bbb192d

Browse files
authored
More improvements to stubgen (python#7951)
This includes many improvements to stubgen and a related mypy fix. Here are the most useful ones: * Use a separate process to do introspection of modules so that we can recover if a module kills the current process on import, for example. * Export all names imported from the current package by default. Add `--export-less` stubgen flag to disable this behavior. * Avoid a crash in semantic analysis if there's a bad property definition (stubgen can generate these). * Fix various issues with bad Python code being generated by stubgen. * Ignore bad signatures in docstrings (this is still very ad-hoc, but it's a bit more robust now). * Try to find a module using `sys.path` if we can't import it. * Skip some additional modules that may be runnable since they can cause trouble when we try to introspect them. This is again a big PR, but the commit history should be reasonably clean.
1 parent 384f32c commit bbb192d

File tree

9 files changed

+1023
-224
lines changed

9 files changed

+1023
-224
lines changed

mypy/moduleinspect.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
"""Basic introspection of modules."""
2+
3+
from typing import List, Optional, Union
4+
from types import ModuleType
5+
from multiprocessing import Process, Queue
6+
import importlib
7+
import inspect
8+
import os
9+
import pkgutil
10+
import queue
11+
import sys
12+
13+
14+
class ModuleProperties:
15+
def __init__(self,
16+
name: str,
17+
file: Optional[str],
18+
path: Optional[List[str]],
19+
all: Optional[List[str]],
20+
is_c_module: bool,
21+
subpackages: List[str]) -> None:
22+
self.name = name # __name__ attribute
23+
self.file = file # __file__ attribute
24+
self.path = path # __path__ attribute
25+
self.all = all # __all__ attribute
26+
self.is_c_module = is_c_module
27+
self.subpackages = subpackages
28+
29+
30+
def is_c_module(module: ModuleType) -> bool:
31+
if module.__dict__.get('__file__') is None:
32+
# Could be a namespace package. These must be handled through
33+
# introspection, since there is no source file.
34+
return True
35+
return os.path.splitext(module.__dict__['__file__'])[-1] in ['.so', '.pyd']
36+
37+
38+
class InspectError(Exception):
39+
pass
40+
41+
42+
def get_package_properties(package_id: str) -> ModuleProperties:
43+
"""Use runtime introspection to get information about a module/package."""
44+
try:
45+
package = importlib.import_module(package_id)
46+
except BaseException as e:
47+
raise InspectError(str(e))
48+
name = getattr(package, '__name__', None)
49+
file = getattr(package, '__file__', None)
50+
path = getattr(package, '__path__', None) # type: Optional[List[str]]
51+
if not isinstance(path, list):
52+
path = None
53+
pkg_all = getattr(package, '__all__', None)
54+
if pkg_all is not None:
55+
try:
56+
pkg_all = list(pkg_all)
57+
except Exception:
58+
pkg_all = None
59+
is_c = is_c_module(package)
60+
61+
if path is None:
62+
# Object has no path; this means it's either a module inside a package
63+
# (and thus no sub-packages), or it could be a C extension package.
64+
if is_c:
65+
# This is a C extension module, now get the list of all sub-packages
66+
# using the inspect module
67+
subpackages = [package.__name__ + "." + name
68+
for name, val in inspect.getmembers(package)
69+
if inspect.ismodule(val)
70+
and val.__name__ == package.__name__ + "." + name]
71+
else:
72+
# It's a module inside a package. There's nothing else to walk/yield.
73+
subpackages = []
74+
else:
75+
all_packages = pkgutil.walk_packages(path, prefix=package.__name__ + ".",
76+
onerror=lambda r: None)
77+
subpackages = [qualified_name for importer, qualified_name, ispkg in all_packages]
78+
return ModuleProperties(name=name,
79+
file=file,
80+
path=path,
81+
all=pkg_all,
82+
is_c_module=is_c,
83+
subpackages=subpackages)
84+
85+
86+
def worker(tasks: 'Queue[str]',
87+
results: 'Queue[Union[str, ModuleProperties]]',
88+
sys_path: List[str]) -> None:
89+
"""The main loop of a worker introspection process."""
90+
sys.path = sys_path
91+
while True:
92+
mod = tasks.get()
93+
try:
94+
prop = get_package_properties(mod)
95+
except InspectError as e:
96+
results.put(str(e))
97+
continue
98+
results.put(prop)
99+
100+
101+
class ModuleInspect:
102+
"""Perform runtime introspection of modules in a separate process.
103+
104+
Reuse the process for multiple modules for efficiency. However, if there is an
105+
error, retry using a fresh process to avoid cross-contamination of state between
106+
modules.
107+
108+
We use a separate process to isolate us from many side effects. For example, the
109+
import of a module may kill the current process, and we want to recover from that.
110+
111+
Always use in a with statement for proper clean-up:
112+
113+
with ModuleInspect() as m:
114+
p = m.get_package_properties('urllib.parse')
115+
"""
116+
117+
def __init__(self) -> None:
118+
self._start()
119+
120+
def _start(self) -> None:
121+
self.tasks = Queue() # type: Queue[str]
122+
self.results = Queue() # type: Queue[Union[ModuleProperties, str]]
123+
self.proc = Process(target=worker, args=(self.tasks, self.results, sys.path))
124+
self.proc.start()
125+
self.counter = 0 # Number of successfull roundtrips
126+
127+
def close(self) -> None:
128+
"""Free any resources used."""
129+
self.proc.terminate()
130+
131+
def get_package_properties(self, package_id: str) -> ModuleProperties:
132+
"""Return some properties of a module/package using runtime introspection.
133+
134+
Raise InspectError if the target couldn't be imported.
135+
"""
136+
self.tasks.put(package_id)
137+
res = self._get_from_queue()
138+
if res is None:
139+
# The process died; recover and report error.
140+
self._start()
141+
raise InspectError('Process died when importing %r' % package_id)
142+
if isinstance(res, str):
143+
# Error importing module
144+
if self.counter > 0:
145+
# Also try with a fresh process. Maybe one of the previous imports has
146+
# corrupted some global state.
147+
self.close()
148+
self._start()
149+
return self.get_package_properties(package_id)
150+
raise InspectError(res)
151+
self.counter += 1
152+
return res
153+
154+
def _get_from_queue(self) -> Union[ModuleProperties, str, None]:
155+
"""Get value from the queue.
156+
157+
Return the value read from the queue, or None if the process unexpectedly died.
158+
"""
159+
max_iter = 100
160+
n = 0
161+
while True:
162+
if n == max_iter:
163+
raise RuntimeError('Timeout waiting for subprocess')
164+
try:
165+
return self.results.get(timeout=0.05)
166+
except queue.Empty:
167+
if not self.proc.is_alive():
168+
return None
169+
n += 1
170+
171+
def __enter__(self) -> 'ModuleInspect':
172+
return self
173+
174+
def __exit__(self, *args: object) -> None:
175+
self.close()

mypy/semanal.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -865,19 +865,26 @@ def analyze_property_with_multi_part_definition(self, defn: OverloadedFuncDef) -
865865
defn.is_property = True
866866
items = defn.items
867867
first_item = cast(Decorator, defn.items[0])
868-
for item in items[1:]:
869-
if isinstance(item, Decorator) and len(item.decorators) == 1:
870-
node = item.decorators[0]
871-
if isinstance(node, MemberExpr):
872-
if node.name == 'setter':
873-
# The first item represents the entire property.
874-
first_item.var.is_settable_property = True
875-
# Get abstractness from the original definition.
876-
item.func.is_abstract = first_item.func.is_abstract
877-
else:
878-
self.fail("Decorated property not supported", item)
868+
deleted_items = []
869+
for i, item in enumerate(items[1:]):
879870
if isinstance(item, Decorator):
871+
if len(item.decorators) == 1:
872+
node = item.decorators[0]
873+
if isinstance(node, MemberExpr):
874+
if node.name == 'setter':
875+
# The first item represents the entire property.
876+
first_item.var.is_settable_property = True
877+
# Get abstractness from the original definition.
878+
item.func.is_abstract = first_item.func.is_abstract
879+
else:
880+
self.fail("Decorated property not supported", item)
880881
item.func.accept(self)
882+
else:
883+
self.fail('Unexpected definition for property "{}"'.format(first_item.func.name),
884+
item)
885+
deleted_items.append(i + 1)
886+
for i in reversed(deleted_items):
887+
del items[i]
881888

882889
def add_function_to_symbol_table(self, func: Union[FuncDef, OverloadedFuncDef]) -> None:
883890
if self.is_class_scope():

mypy/stubdoc.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
import re
77
import io
8-
import sys
98
import contextlib
109
import tokenize
1110

@@ -18,14 +17,25 @@
1817
Sig = Tuple[str, str]
1918

2019

20+
_TYPE_RE = re.compile(r'^[a-zA-Z_][\w\[\], ]*(\.[a-zA-Z_][\w\[\], ]*)*$') # type: Final
21+
_ARG_NAME_RE = re.compile(r'\**[A-Za-z_][A-Za-z0-9_]*$') # type: Final
22+
23+
24+
def is_valid_type(s: str) -> bool:
25+
"""Try to determine whether a string might be a valid type annotation."""
26+
if s in ('True', 'False', 'retval'):
27+
return False
28+
if ',' in s and '[' not in s:
29+
return False
30+
return _TYPE_RE.match(s) is not None
31+
32+
2133
class ArgSig:
2234
"""Signature info for a single argument."""
2335

24-
_TYPE_RE = re.compile(r'^[a-zA-Z_][\w\[\], ]*(\.[a-zA-Z_][\w\[\], ]*)*$') # type: Final
25-
2636
def __init__(self, name: str, type: Optional[str] = None, default: bool = False):
2737
self.name = name
28-
if type and not self._TYPE_RE.match(type):
38+
if type and not is_valid_type(type):
2939
raise ValueError("Invalid type: " + type)
3040
self.type = type
3141
# Does this argument have a default value?
@@ -60,7 +70,8 @@ def __eq__(self, other: Any) -> bool:
6070

6171

6272
class DocStringParser:
63-
"""Parse function signstures in documentation."""
73+
"""Parse function signatures in documentation."""
74+
6475
def __init__(self, function_name: str) -> None:
6576
# Only search for signatures of function with this name.
6677
self.function_name = function_name
@@ -76,7 +87,7 @@ def __init__(self, function_name: str) -> None:
7687
self.signatures = [] # type: List[FunctionSig]
7788

7889
def add_token(self, token: tokenize.TokenInfo) -> None:
79-
"""Process next token fro the token stream."""
90+
"""Process next token from the token stream."""
8091
if (token.type == tokenize.NAME and token.string == self.function_name and
8192
self.state[-1] == STATE_INIT):
8293
self.state.append(STATE_FUNCTION_NAME)
@@ -129,6 +140,10 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
129140
self.state.pop()
130141
elif self.state[-1] == STATE_ARGUMENT_LIST:
131142
self.arg_name = self.accumulator
143+
if not _ARG_NAME_RE.match(self.arg_name):
144+
# Invalid argument name.
145+
self.reset()
146+
return
132147

133148
if token.string == ')':
134149
self.state.pop()
@@ -152,6 +167,9 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
152167
elif (token.type in (tokenize.NEWLINE, tokenize.ENDMARKER) and
153168
self.state[-1] in (STATE_INIT, STATE_RETURN_VALUE)):
154169
if self.state[-1] == STATE_RETURN_VALUE:
170+
if not is_valid_type(self.accumulator):
171+
self.reset()
172+
return
155173
self.ret_type = self.accumulator
156174
self.accumulator = ""
157175
self.state.pop()
@@ -166,6 +184,12 @@ def add_token(self, token: tokenize.TokenInfo) -> None:
166184
else:
167185
self.accumulator += token.string
168186

187+
def reset(self) -> None:
188+
self.state = [STATE_INIT]
189+
self.args = []
190+
self.found = False
191+
self.accumulator = ""
192+
169193
def get_signatures(self) -> List[FunctionSig]:
170194
"""Return sorted copy of the list of signatures found so far."""
171195
def has_arg(name: str, signature: FunctionSig) -> bool:
@@ -211,13 +235,7 @@ def is_unique_args(sig: FunctionSig) -> bool:
211235
"""return true if function argument names are unique"""
212236
return len(sig.args) == len(set((arg.name for arg in sig.args)))
213237

214-
# Warn about invalid signatures
215-
invalid_sigs = [sig for sig in sigs if not is_unique_args(sig)]
216-
if invalid_sigs:
217-
print("Warning: Invalid signatures found:", file=sys.stderr)
218-
print("\n".join(str(sig) for sig in invalid_sigs), file=sys.stderr)
219-
220-
# return only signatures, that have unique argument names. mypy fails on non-uqniue arg names
238+
# Return only signatures that have unique argument names. Mypy fails on non-uniqnue arg names.
221239
return [sig for sig in sigs if is_unique_args(sig)]
222240

223241

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy