|
6 | 6 | import os
|
7 | 7 | import hashlib
|
8 | 8 | import json
|
| 9 | +import string |
9 | 10 | from collections import OrderedDict
|
10 | 11 | from typing import List, Tuple, Dict, Iterable, Set, TypeVar, Optional
|
11 | 12 |
|
|
67 | 68 | # A list of (file name, file contents) pairs.
|
68 | 69 | FileContents = List[Tuple[str, str]]
|
69 | 70 |
|
| 71 | +# The C standard specifies that an unlimited number of valid hexadecimal |
| 72 | +# characters are parsed as part of the hexadecimal escape sequence. For |
| 73 | +# example, "\x12345" would be unexpectedly parsed as {0x12345}, instead of |
| 74 | +# {0x123, '4', '5'}. Therefore, we use octal escape sequences which are |
| 75 | +# specified to contain at most three octal digits. |
| 76 | +C_CHAR_MAP = ['\\{:03o}'.format(x) for x in range(256)] |
| 77 | +# Most printable characters do not need to be escaped in string literals. We |
| 78 | +# can safely use string.printable here because it always uses the C locale. |
| 79 | +for x in string.printable: |
| 80 | + C_CHAR_MAP[ord(x)] = x |
| 81 | +# These assignments must be done after string.printable because they are |
| 82 | +# overrides for the printable characters that need to be escaped in string |
| 83 | +# literals. |
| 84 | +C_CHAR_MAP[ord('\'')] = r'\'' |
| 85 | +C_CHAR_MAP[ord('\"')] = r'\"' |
| 86 | +C_CHAR_MAP[ord('\\')] = r'\\' |
| 87 | +C_CHAR_MAP[ord('\a')] = r'\a' |
| 88 | +C_CHAR_MAP[ord('\b')] = r'\b' |
| 89 | +C_CHAR_MAP[ord('\f')] = r'\f' |
| 90 | +C_CHAR_MAP[ord('\n')] = r'\n' |
| 91 | +C_CHAR_MAP[ord('\r')] = r'\r' |
| 92 | +C_CHAR_MAP[ord('\t')] = r'\t' |
| 93 | +C_CHAR_MAP[ord('\v')] = r'\v' |
| 94 | +# The question mark is escaped to prevent trigraphs from being interpreted |
| 95 | +# inside string literals. This escape sequence is invalid in Python. |
| 96 | +C_CHAR_MAP[ord('?')] = r'\?' |
| 97 | + |
70 | 98 |
|
71 | 99 | class MarkedDeclaration:
|
72 | 100 | """Add a mark, useful for topological sort."""
|
@@ -421,8 +449,7 @@ def encode_as_c_string(s: str) -> Tuple[str, int]:
|
421 | 449 |
|
422 | 450 | def encode_bytes_as_c_string(b: bytes) -> Tuple[str, int]:
|
423 | 451 | """Produce a single-escaped, quoted C string and its size from a bytes"""
|
424 |
| - # This is a kind of abusive way to do this... |
425 |
| - escaped = repr(b)[2:-1].replace('"', '\\"') |
| 452 | + escaped = ''.join(map(C_CHAR_MAP.__getitem__, b)) |
426 | 453 | return '"{}"'.format(escaped), len(b)
|
427 | 454 |
|
428 | 455 |
|
|
0 commit comments