From 2b07695a2e488ac152b6f04ebb8bfa7693b9ad3e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 Mar 2024 14:30:54 +0100 Subject: [PATCH] gh-113317, AC: Add libclinic.block_parser module * Move Block and BlockParser classes to a new libclinic.block_parser module. * Move Language and PythonLanguage classes to a new libclinic.language module. --- Tools/clinic/clinic.py | 338 +------------------------ Tools/clinic/libclinic/block_parser.py | 256 +++++++++++++++++++ Tools/clinic/libclinic/language.py | 103 ++++++++ 3 files changed, 361 insertions(+), 336 deletions(-) create mode 100644 Tools/clinic/libclinic/block_parser.py create mode 100644 Tools/clinic/libclinic/language.py diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 6488d913168319..ac205866f9d291 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -6,11 +6,9 @@ # from __future__ import annotations -import abc import argparse import ast import builtins as bltns -import collections import contextlib import dataclasses as dc import enum @@ -57,6 +55,8 @@ ClassDict, ModuleDict, FunctionKind, CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW, GETTER, SETTER) +from libclinic.language import Language, PythonLanguage +from libclinic.block_parser import Block, BlockParser # TODO: @@ -144,96 +144,6 @@ def __init__(self) -> None: self.unlock: list[str] = [] -class Language(metaclass=abc.ABCMeta): - - start_line = "" - body_prefix = "" - stop_line = "" - checksum_line = "" - - def __init__(self, filename: str) -> None: - self.filename = filename - - @abc.abstractmethod - def render( - self, - clinic: Clinic, - signatures: Iterable[Module | Class | Function] - ) -> str: - ... - - def parse_line(self, line: str) -> None: - ... - - def validate(self) -> None: - def assert_only_one( - attr: str, - *additional_fields: str - ) -> None: - """ - Ensures that the string found at getattr(self, attr) - contains exactly one formatter replacement string for - each valid field. The list of valid fields is - ['dsl_name'] extended by additional_fields. - - e.g. - self.fmt = "{dsl_name} {a} {b}" - - # this passes - self.assert_only_one('fmt', 'a', 'b') - - # this fails, the format string has a {b} in it - self.assert_only_one('fmt', 'a') - - # this fails, the format string doesn't have a {c} in it - self.assert_only_one('fmt', 'a', 'b', 'c') - - # this fails, the format string has two {a}s in it, - # it must contain exactly one - self.fmt2 = '{dsl_name} {a} {a}' - self.assert_only_one('fmt2', 'a') - - """ - fields = ['dsl_name'] - fields.extend(additional_fields) - line: str = getattr(self, attr) - fcf = libclinic.FormatCounterFormatter() - fcf.format(line) - def local_fail(should_be_there_but_isnt: bool) -> None: - if should_be_there_but_isnt: - fail("{} {} must contain {{{}}} exactly once!".format( - self.__class__.__name__, attr, name)) - else: - fail("{} {} must not contain {{{}}}!".format( - self.__class__.__name__, attr, name)) - - for name, count in fcf.counts.items(): - if name in fields: - if count > 1: - local_fail(True) - else: - local_fail(False) - for name in fields: - if fcf.counts.get(name) != 1: - local_fail(True) - - assert_only_one('start_line') - assert_only_one('stop_line') - - field = "arguments" if "{arguments}" in self.checksum_line else "checksum" - assert_only_one('checksum_line', field) - - - -class PythonLanguage(Language): - - language = 'Python' - start_line = "#/*[{dsl_name} input]" - body_prefix = "#" - stop_line = "#[{dsl_name} start generated code]*/" - checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/" - - ParamTuple = tuple["Parameter", ...] @@ -1646,250 +1556,6 @@ def render_function( return clinic.get_destination('block').dump() -@dc.dataclass(slots=True, repr=False) -class Block: - r""" - Represents a single block of text embedded in - another file. If dsl_name is None, the block represents - verbatim text, raw original text from the file, in - which case "input" will be the only non-false member. - If dsl_name is not None, the block represents a Clinic - block. - - input is always str, with embedded \n characters. - input represents the original text from the file; - if it's a Clinic block, it is the original text with - the body_prefix and redundant leading whitespace removed. - - dsl_name is either str or None. If str, it's the text - found on the start line of the block between the square - brackets. - - signatures is a list. - It may only contain clinic.Module, clinic.Class, and - clinic.Function objects. At the moment it should - contain at most one of each. - - output is either str or None. If str, it's the output - from this block, with embedded '\n' characters. - - indent is a str. It's the leading whitespace - that was found on every line of input. (If body_prefix is - not empty, this is the indent *after* removing the - body_prefix.) - - "indent" is different from the concept of "preindent" - (which is not stored as state on Block objects). - "preindent" is the whitespace that - was found in front of every line of input *before* the - "body_prefix" (see the Language object). If body_prefix - is empty, preindent must always be empty too. - - To illustrate the difference between "indent" and "preindent": - - Assume that '_' represents whitespace. - If the block processed was in a Python file, and looked like this: - ____#/*[python] - ____#__for a in range(20): - ____#____print(a) - ____#[python]*/ - "preindent" would be "____" and "indent" would be "__". - - """ - input: str - dsl_name: str | None = None - signatures: list[Module | Class | Function] = dc.field(default_factory=list) - output: Any = None # TODO: Very dynamic; probably untypeable in its current form? - indent: str = '' - - def __repr__(self) -> str: - dsl_name = self.dsl_name or "text" - def summarize(s: object) -> str: - s = repr(s) - if len(s) > 30: - return s[:26] + "..." + s[0] - return s - parts = ( - repr(dsl_name), - f"input={summarize(self.input)}", - f"output={summarize(self.output)}" - ) - return f"" - - -class BlockParser: - """ - Block-oriented parser for Argument Clinic. - Iterator, yields Block objects. - """ - - def __init__( - self, - input: str, - language: Language, - *, - verify: bool = True - ) -> None: - """ - "input" should be a str object - with embedded \n characters. - - "language" should be a Language object. - """ - language.validate() - - self.input = collections.deque(reversed(input.splitlines(keepends=True))) - self.block_start_line_number = self.line_number = 0 - - self.language = language - before, _, after = language.start_line.partition('{dsl_name}') - assert _ == '{dsl_name}' - self.find_start_re = libclinic.create_regex(before, after, - whole_line=False) - self.start_re = libclinic.create_regex(before, after) - self.verify = verify - self.last_checksum_re: re.Pattern[str] | None = None - self.last_dsl_name: str | None = None - self.dsl_name: str | None = None - self.first_block = True - - def __iter__(self) -> BlockParser: - return self - - def __next__(self) -> Block: - while True: - if not self.input: - raise StopIteration - - if self.dsl_name: - try: - return_value = self.parse_clinic_block(self.dsl_name) - except ClinicError as exc: - exc.filename = self.language.filename - exc.lineno = self.line_number - raise - self.dsl_name = None - self.first_block = False - return return_value - block = self.parse_verbatim_block() - if self.first_block and not block.input: - continue - self.first_block = False - return block - - - def is_start_line(self, line: str) -> str | None: - match = self.start_re.match(line.lstrip()) - return match.group(1) if match else None - - def _line(self, lookahead: bool = False) -> str: - self.line_number += 1 - line = self.input.pop() - if not lookahead: - self.language.parse_line(line) - return line - - def parse_verbatim_block(self) -> Block: - lines = [] - self.block_start_line_number = self.line_number - - while self.input: - line = self._line() - dsl_name = self.is_start_line(line) - if dsl_name: - self.dsl_name = dsl_name - break - lines.append(line) - - return Block("".join(lines)) - - def parse_clinic_block(self, dsl_name: str) -> Block: - in_lines = [] - self.block_start_line_number = self.line_number + 1 - stop_line = self.language.stop_line.format(dsl_name=dsl_name) - body_prefix = self.language.body_prefix.format(dsl_name=dsl_name) - - def is_stop_line(line: str) -> bool: - # make sure to recognize stop line even if it - # doesn't end with EOL (it could be the very end of the file) - if line.startswith(stop_line): - remainder = line.removeprefix(stop_line) - if remainder and not remainder.isspace(): - fail(f"Garbage after stop line: {remainder!r}") - return True - else: - # gh-92256: don't allow incorrectly formatted stop lines - if line.lstrip().startswith(stop_line): - fail(f"Whitespace is not allowed before the stop line: {line!r}") - return False - - # consume body of program - while self.input: - line = self._line() - if is_stop_line(line) or self.is_start_line(line): - break - if body_prefix: - line = line.lstrip() - assert line.startswith(body_prefix) - line = line.removeprefix(body_prefix) - in_lines.append(line) - - # consume output and checksum line, if present. - if self.last_dsl_name == dsl_name: - checksum_re = self.last_checksum_re - else: - before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}') - assert _ == '{arguments}' - checksum_re = libclinic.create_regex(before, after, word=False) - self.last_dsl_name = dsl_name - self.last_checksum_re = checksum_re - assert checksum_re is not None - - # scan forward for checksum line - out_lines = [] - arguments = None - while self.input: - line = self._line(lookahead=True) - match = checksum_re.match(line.lstrip()) - arguments = match.group(1) if match else None - if arguments: - break - out_lines.append(line) - if self.is_start_line(line): - break - - output: str | None - output = "".join(out_lines) - if arguments: - d = {} - for field in shlex.split(arguments): - name, equals, value = field.partition('=') - if not equals: - fail(f"Mangled Argument Clinic marker line: {line!r}") - d[name.strip()] = value.strip() - - if self.verify: - if 'input' in d: - checksum = d['output'] - else: - checksum = d['checksum'] - - computed = libclinic.compute_checksum(output, len(checksum)) - if checksum != computed: - fail("Checksum mismatch! " - f"Expected {checksum!r}, computed {computed!r}. " - "Suggested fix: remove all generated code including " - "the end marker, or use the '-f' option.") - else: - # put back output - output_lines = output.splitlines(keepends=True) - self.line_number -= len(output_lines) - self.input.extend(reversed(output_lines)) - output = None - - return Block("".join(in_lines), dsl_name, output=output) - - @dc.dataclass(slots=True, frozen=True) class Include: """ diff --git a/Tools/clinic/libclinic/block_parser.py b/Tools/clinic/libclinic/block_parser.py new file mode 100644 index 00000000000000..4c0198b53592a9 --- /dev/null +++ b/Tools/clinic/libclinic/block_parser.py @@ -0,0 +1,256 @@ +from __future__ import annotations +import collections +import dataclasses as dc +import re +import shlex +from typing import Any + +import libclinic +from libclinic import fail, ClinicError +from libclinic.language import Language +from libclinic.function import ( + Module, Class, Function) + + +@dc.dataclass(slots=True, repr=False) +class Block: + r""" + Represents a single block of text embedded in + another file. If dsl_name is None, the block represents + verbatim text, raw original text from the file, in + which case "input" will be the only non-false member. + If dsl_name is not None, the block represents a Clinic + block. + + input is always str, with embedded \n characters. + input represents the original text from the file; + if it's a Clinic block, it is the original text with + the body_prefix and redundant leading whitespace removed. + + dsl_name is either str or None. If str, it's the text + found on the start line of the block between the square + brackets. + + signatures is a list. + It may only contain clinic.Module, clinic.Class, and + clinic.Function objects. At the moment it should + contain at most one of each. + + output is either str or None. If str, it's the output + from this block, with embedded '\n' characters. + + indent is a str. It's the leading whitespace + that was found on every line of input. (If body_prefix is + not empty, this is the indent *after* removing the + body_prefix.) + + "indent" is different from the concept of "preindent" + (which is not stored as state on Block objects). + "preindent" is the whitespace that + was found in front of every line of input *before* the + "body_prefix" (see the Language object). If body_prefix + is empty, preindent must always be empty too. + + To illustrate the difference between "indent" and "preindent": + + Assume that '_' represents whitespace. + If the block processed was in a Python file, and looked like this: + ____#/*[python] + ____#__for a in range(20): + ____#____print(a) + ____#[python]*/ + "preindent" would be "____" and "indent" would be "__". + + """ + input: str + dsl_name: str | None = None + signatures: list[Module | Class | Function] = dc.field(default_factory=list) + output: Any = None # TODO: Very dynamic; probably untypeable in its current form? + indent: str = '' + + def __repr__(self) -> str: + dsl_name = self.dsl_name or "text" + def summarize(s: object) -> str: + s = repr(s) + if len(s) > 30: + return s[:26] + "..." + s[0] + return s + parts = ( + repr(dsl_name), + f"input={summarize(self.input)}", + f"output={summarize(self.output)}" + ) + return f"" + + +class BlockParser: + """ + Block-oriented parser for Argument Clinic. + Iterator, yields Block objects. + """ + + def __init__( + self, + input: str, + language: Language, + *, + verify: bool = True + ) -> None: + """ + "input" should be a str object + with embedded \n characters. + + "language" should be a Language object. + """ + language.validate() + + self.input = collections.deque(reversed(input.splitlines(keepends=True))) + self.block_start_line_number = self.line_number = 0 + + self.language = language + before, _, after = language.start_line.partition('{dsl_name}') + assert _ == '{dsl_name}' + self.find_start_re = libclinic.create_regex(before, after, + whole_line=False) + self.start_re = libclinic.create_regex(before, after) + self.verify = verify + self.last_checksum_re: re.Pattern[str] | None = None + self.last_dsl_name: str | None = None + self.dsl_name: str | None = None + self.first_block = True + + def __iter__(self) -> BlockParser: + return self + + def __next__(self) -> Block: + while True: + if not self.input: + raise StopIteration + + if self.dsl_name: + try: + return_value = self.parse_clinic_block(self.dsl_name) + except ClinicError as exc: + exc.filename = self.language.filename + exc.lineno = self.line_number + raise + self.dsl_name = None + self.first_block = False + return return_value + block = self.parse_verbatim_block() + if self.first_block and not block.input: + continue + self.first_block = False + return block + + + def is_start_line(self, line: str) -> str | None: + match = self.start_re.match(line.lstrip()) + return match.group(1) if match else None + + def _line(self, lookahead: bool = False) -> str: + self.line_number += 1 + line = self.input.pop() + if not lookahead: + self.language.parse_line(line) + return line + + def parse_verbatim_block(self) -> Block: + lines = [] + self.block_start_line_number = self.line_number + + while self.input: + line = self._line() + dsl_name = self.is_start_line(line) + if dsl_name: + self.dsl_name = dsl_name + break + lines.append(line) + + return Block("".join(lines)) + + def parse_clinic_block(self, dsl_name: str) -> Block: + in_lines = [] + self.block_start_line_number = self.line_number + 1 + stop_line = self.language.stop_line.format(dsl_name=dsl_name) + body_prefix = self.language.body_prefix.format(dsl_name=dsl_name) + + def is_stop_line(line: str) -> bool: + # make sure to recognize stop line even if it + # doesn't end with EOL (it could be the very end of the file) + if line.startswith(stop_line): + remainder = line.removeprefix(stop_line) + if remainder and not remainder.isspace(): + fail(f"Garbage after stop line: {remainder!r}") + return True + else: + # gh-92256: don't allow incorrectly formatted stop lines + if line.lstrip().startswith(stop_line): + fail(f"Whitespace is not allowed before the stop line: {line!r}") + return False + + # consume body of program + while self.input: + line = self._line() + if is_stop_line(line) or self.is_start_line(line): + break + if body_prefix: + line = line.lstrip() + assert line.startswith(body_prefix) + line = line.removeprefix(body_prefix) + in_lines.append(line) + + # consume output and checksum line, if present. + if self.last_dsl_name == dsl_name: + checksum_re = self.last_checksum_re + else: + before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}') + assert _ == '{arguments}' + checksum_re = libclinic.create_regex(before, after, word=False) + self.last_dsl_name = dsl_name + self.last_checksum_re = checksum_re + assert checksum_re is not None + + # scan forward for checksum line + out_lines = [] + arguments = None + while self.input: + line = self._line(lookahead=True) + match = checksum_re.match(line.lstrip()) + arguments = match.group(1) if match else None + if arguments: + break + out_lines.append(line) + if self.is_start_line(line): + break + + output: str | None + output = "".join(out_lines) + if arguments: + d = {} + for field in shlex.split(arguments): + name, equals, value = field.partition('=') + if not equals: + fail(f"Mangled Argument Clinic marker line: {line!r}") + d[name.strip()] = value.strip() + + if self.verify: + if 'input' in d: + checksum = d['output'] + else: + checksum = d['checksum'] + + computed = libclinic.compute_checksum(output, len(checksum)) + if checksum != computed: + fail("Checksum mismatch! " + f"Expected {checksum!r}, computed {computed!r}. " + "Suggested fix: remove all generated code including " + "the end marker, or use the '-f' option.") + else: + # put back output + output_lines = output.splitlines(keepends=True) + self.line_number -= len(output_lines) + self.input.extend(reversed(output_lines)) + output = None + + return Block("".join(in_lines), dsl_name, output=output) diff --git a/Tools/clinic/libclinic/language.py b/Tools/clinic/libclinic/language.py new file mode 100644 index 00000000000000..a90a9bb24e2201 --- /dev/null +++ b/Tools/clinic/libclinic/language.py @@ -0,0 +1,103 @@ +from __future__ import annotations +import abc +import typing +from collections.abc import ( + Iterable, +) + +import libclinic +from libclinic import fail +from libclinic.function import ( + Module, Class, Function) + +if typing.TYPE_CHECKING: + from clinic import Clinic + + +class Language(metaclass=abc.ABCMeta): + + start_line = "" + body_prefix = "" + stop_line = "" + checksum_line = "" + + def __init__(self, filename: str) -> None: + self.filename = filename + + @abc.abstractmethod + def render( + self, + clinic: Clinic, + signatures: Iterable[Module | Class | Function] + ) -> str: + ... + + def parse_line(self, line: str) -> None: + ... + + def validate(self) -> None: + def assert_only_one( + attr: str, + *additional_fields: str + ) -> None: + """ + Ensures that the string found at getattr(self, attr) + contains exactly one formatter replacement string for + each valid field. The list of valid fields is + ['dsl_name'] extended by additional_fields. + + e.g. + self.fmt = "{dsl_name} {a} {b}" + + # this passes + self.assert_only_one('fmt', 'a', 'b') + + # this fails, the format string has a {b} in it + self.assert_only_one('fmt', 'a') + + # this fails, the format string doesn't have a {c} in it + self.assert_only_one('fmt', 'a', 'b', 'c') + + # this fails, the format string has two {a}s in it, + # it must contain exactly one + self.fmt2 = '{dsl_name} {a} {a}' + self.assert_only_one('fmt2', 'a') + + """ + fields = ['dsl_name'] + fields.extend(additional_fields) + line: str = getattr(self, attr) + fcf = libclinic.FormatCounterFormatter() + fcf.format(line) + def local_fail(should_be_there_but_isnt: bool) -> None: + if should_be_there_but_isnt: + fail("{} {} must contain {{{}}} exactly once!".format( + self.__class__.__name__, attr, name)) + else: + fail("{} {} must not contain {{{}}}!".format( + self.__class__.__name__, attr, name)) + + for name, count in fcf.counts.items(): + if name in fields: + if count > 1: + local_fail(True) + else: + local_fail(False) + for name in fields: + if fcf.counts.get(name) != 1: + local_fail(True) + + assert_only_one('start_line') + assert_only_one('stop_line') + + field = "arguments" if "{arguments}" in self.checksum_line else "checksum" + assert_only_one('checksum_line', field) + + +class PythonLanguage(Language): + + language = 'Python' + start_line = "#/*[{dsl_name} input]" + body_prefix = "#" + stop_line = "#[{dsl_name} start generated code]*/" + checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy