diff --git a/mypyc/build.py b/mypyc/build.py index f3ef88302239..20efdce2d37b 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -507,7 +507,7 @@ def mypycify( if compiler.compiler_type == 'unix': cflags += [ '-O{}'.format(opt_level), '-Werror', '-Wno-unused-function', '-Wno-unused-label', - '-Wno-unreachable-code', '-Wno-unused-variable', '-Wno-trigraphs', + '-Wno-unreachable-code', '-Wno-unused-variable', '-Wno-unused-command-line-argument', '-Wno-unknown-warning-option', ] if 'gcc' in compiler.compiler[0]: diff --git a/mypyc/cstring.py b/mypyc/cstring.py new file mode 100644 index 000000000000..4fdb279258bd --- /dev/null +++ b/mypyc/cstring.py @@ -0,0 +1,49 @@ +"""Encode valid C string literals from Python strings. + +If a character is not allowed in C string literals, it is either emitted +as a simple escape sequence (e.g. '\\n'), or an octal escape sequence +with exactly three digits ('\\oXXX'). Question marks are escaped to +prevent trigraphs in the string literal from being interpreted. Note +that '\\?' is an invalid escape sequence in Python. + +Consider the string literal "AB\\xCDEF". As one would expect, Python +parses it as ['A', 'B', 0xCD, 'E', 'F']. However, the C standard +specifies that all hexadecimal digits immediately following '\\x' will +be interpreted as part of the escape sequence. Therefore, it is +unexpectedly parsed as ['A', 'B', 0xCDEF]. + +Emitting ("AB\\xCD" "EF") would avoid this behaviour. However, we opt +for simplicity and use octal escape sequences instead. They do not +suffer from the same issue as they are defined to parse at most three +octal digits. +""" + +import string +from typing import Tuple + +CHAR_MAP = ['\\{:03o}'.format(i) for i in range(256)] + +# It is safe to use string.printable as it always uses the C locale. +for c in string.printable: + CHAR_MAP[ord(c)] = c + +# These assignments must come last because we prioritize simple escape +# sequences over any other representation. +for c in ('\'', '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v'): + escaped = '\\{}'.format(c) + decoded = escaped.encode('ascii').decode('unicode_escape') + CHAR_MAP[ord(decoded)] = escaped + +# This escape sequence is invalid in Python. +CHAR_MAP[ord('?')] = r'\?' + + +def encode_as_c_string(s: str) -> Tuple[str, int]: + """Produce a quoted C string literal and its size, for a UTF-8 string.""" + return encode_bytes_as_c_string(s.encode('utf-8')) + + +def encode_bytes_as_c_string(b: bytes) -> Tuple[str, int]: + """Produce a quoted C string literal and its size, for a byte string.""" + escaped = ''.join([CHAR_MAP[i] for i in b]) + return '"{}"'.format(escaped), len(b) diff --git a/mypyc/emitmodule.py b/mypyc/emitmodule.py index f1fdf0b5f833..efb7bffa262e 100644 --- a/mypyc/emitmodule.py +++ b/mypyc/emitmodule.py @@ -23,6 +23,7 @@ from mypyc.common import ( PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, shared_lib_name, ) +from mypyc.cstring import encode_as_c_string, encode_bytes_as_c_string from mypyc.emit import EmitterContext, Emitter, HeaderDeclaration from mypyc.emitfunc import generate_native_function, native_function_header from mypyc.emitclass import generate_class_type_decl, generate_class @@ -414,18 +415,6 @@ def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None: '{};'.format(wrapper_function_header(fn, emitter.names))) -def encode_as_c_string(s: str) -> Tuple[str, int]: - """Produce a utf-8 encoded, escaped, quoted C string and its size from a string""" - return encode_bytes_as_c_string(s.encode('utf-8')) - - -def encode_bytes_as_c_string(b: bytes) -> Tuple[str, int]: - """Produce a single-escaped, quoted C string and its size from a bytes""" - # This is a kind of abusive way to do this... - escaped = repr(b)[2:-1].replace('"', '\\"') - return '"{}"'.format(escaped), len(b) - - def pointerize(decl: str, name: str) -> str: """Given a C decl and its name, modify it to be a declaration to a pointer.""" # This doesn't work in general but does work for all our types...
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: