diff --git a/doc/api/next_api_changes/behavior/20634-JKS.rst b/doc/api/next_api_changes/behavior/20634-JKS.rst new file mode 100644 index 000000000000..ff4046445e42 --- /dev/null +++ b/doc/api/next_api_changes/behavior/20634-JKS.rst @@ -0,0 +1,8 @@ +``Type1Font`` objects now decrypt the encrypted part +---------------------------------------------------- + +Type 1 fonts have a large part of their code encrypted as an obsolete +copy-protection measure. This part is now available decrypted as the +``decrypted`` attribute of :class:`~matplotlib.type1font.Type1Font`. +This decrypted data is not yet parsed, but this is a prerequisite for +implementing subsetting. diff --git a/lib/matplotlib/tests/test_type1font.py b/lib/matplotlib/tests/test_type1font.py index 5766709c6cf8..99cc3e500b0e 100644 --- a/lib/matplotlib/tests/test_type1font.py +++ b/lib/matplotlib/tests/test_type1font.py @@ -15,6 +15,8 @@ def test_Type1Font(): assert font.parts[2] == rawdata[0x8985:0x8ba6] assert font.parts[1:] == slanted.parts[1:] assert font.parts[1:] == condensed.parts[1:] + assert font.decrypted.startswith(b'dup\n/Private 18 dict dup begin') + assert font.decrypted.endswith(b'mark currentfile closefile\n') differ = difflib.Differ() diff = list(differ.compare( @@ -67,3 +69,11 @@ def test_overprecision(): assert matrix == '0.001 0 0.000167 0.001 0 0' # and here we had -9.48090361795083 assert angle == '-9.4809' + + +def test_encrypt_decrypt_roundtrip(): + data = b'this is my plaintext \0\1\2\3' + encrypted = t1f.Type1Font._encrypt(data, 'eexec') + decrypted = t1f.Type1Font._decrypt(encrypted, 'eexec') + assert encrypted != decrypted + assert data == decrypted diff --git a/lib/matplotlib/type1font.py b/lib/matplotlib/type1font.py index a9ae51ea5303..f417c0fc97a4 100644 --- a/lib/matplotlib/type1font.py +++ b/lib/matplotlib/type1font.py @@ -24,13 +24,16 @@ import binascii import enum import itertools +import logging import re import struct import numpy as np from matplotlib.cbook import _format_approx +from . import _api +_log = logging.getLogger(__name__) # token types _TokenType = enum.Enum('_TokenType', @@ -46,10 +49,12 @@ class Type1Font: parts : tuple A 3-tuple of the cleartext part, the encrypted part, and the finale of zeros. + decrypted : bytes + The decrypted form of parts[1]. prop : dict[str, Any] A dictionary of font properties. """ - __slots__ = ('parts', 'prop') + __slots__ = ('parts', 'decrypted', 'prop') def __init__(self, input): """ @@ -68,6 +73,7 @@ def __init__(self, input): data = self._read(file) self.parts = self._split(data) + self.decrypted = self._decrypt(self.parts[1], 'eexec') self._parse() def _read(self, file): @@ -125,13 +131,16 @@ def _split(self, data): zeros -= 1 idx -= 1 if zeros: - raise RuntimeError('Insufficiently many zeros in Type 1 font') + # this may have been a problem on old implementations that + # used the zeros as necessary padding + _log.info('Insufficiently many zeros in Type 1 font') # Convert encrypted part to binary (if we read a pfb file, we may end # up converting binary to hexadecimal to binary again; but if we read # a pfa file, this part is already in hex, and I am not quite sure if # even the pfb format guarantees that it will be in binary). - binary = binascii.unhexlify(data[len1:idx+1]) + idx1 = len1 + ((idx - len1 + 2) & ~1) # ensure an even number of bytes + binary = binascii.unhexlify(data[len1:idx1]) return data[:len1], binary, data[idx+1:] @@ -139,6 +148,54 @@ def _split(self, data): _token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+') _instring_re = re.compile(br'[()\\]') + @staticmethod + def _decrypt(ciphertext, key, ndiscard=4): + """ + Decrypt ciphertext using the Type-1 font algorithm + + The algorithm is described in Adobe's "Adobe Type 1 Font Format". + The key argument can be an integer, or one of the strings + 'eexec' and 'charstring', which map to the key specified for the + corresponding part of Type-1 fonts. + + The ndiscard argument should be an integer, usually 4. + That number of bytes is discarded from the beginning of plaintext. + """ + + key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key) + plaintext = [] + for byte in ciphertext: + plaintext.append(byte ^ (key >> 8)) + key = ((key+byte) * 52845 + 22719) & 0xffff + + return bytes(plaintext[ndiscard:]) + + @staticmethod + def _encrypt(plaintext, key, ndiscard=4): + """ + Encrypt plaintext using the Type-1 font algorithm + + The algorithm is described in Adobe's "Adobe Type 1 Font Format". + The key argument can be an integer, or one of the strings + 'eexec' and 'charstring', which map to the key specified for the + corresponding part of Type-1 fonts. + + The ndiscard argument should be an integer, usually 4. That + number of bytes is prepended to the plaintext before encryption. + This function prepends NUL bytes for reproducibility, even though + the original algorithm uses random bytes, presumably to avoid + cryptanalysis. + """ + + key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key) + ciphertext = [] + for byte in b'\0' * ndiscard + plaintext: + c = byte ^ (key >> 8) + ciphertext.append(c) + key = ((key + c) * 52845 + 22719) & 0xffff + + return bytes(ciphertext) + @classmethod def _tokens(cls, text): """
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: