From 0cac4142bb41474294ea7f3346edd0bbbfec0481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 30 Jul 2020 17:38:44 +0300 Subject: [PATCH 1/4] Proof of concept: Type42 subsetting in pdf --- lib/matplotlib/backends/backend_pdf.py | 47 +++++++++++++++++++------- lib/matplotlib/testing/conftest.py | 2 +- setup.py | 1 + 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index e8e8ece1310c..f6875564ecad 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -15,11 +15,13 @@ import os import re import struct +import tempfile import time import types import warnings import zlib +from fontTools import subset import numpy as np from PIL import Image @@ -36,7 +38,7 @@ import matplotlib.type1font as type1font import matplotlib.dviread as dviread from matplotlib.ft2font import (FIXED_WIDTH, ITALIC, LOAD_NO_SCALE, - LOAD_NO_HINTING, KERNING_UNFITTED) + LOAD_NO_HINTING, KERNING_UNFITTED, FT2Font) from matplotlib.mathtext import MathTextParser from matplotlib.transforms import Affine2D, BboxBase from matplotlib.path import Path @@ -1146,6 +1148,17 @@ def embedTTFType42(font, characters, descriptor): wObject = self.reserveObject('Type 0 widths') toUnicodeMapObject = self.reserveObject('ToUnicode map') + print(f"SUBSET {filename} characters: {''.join(chr(c) for c in characters)}") + fontdata = self.getSubset(filename, ''.join(chr(c) for c in characters)) + print(f'SUBSET {filename} {os.stat(filename).st_size} -> {len(fontdata)}') + + # reload the font object from the subset + # (all the necessary data could probably be obtained directly using fontLib.ttLib) + with tempfile.NamedTemporaryFile(suffix='.ttf') as tmp: + tmp.write(fontdata) + tmp.seek(0,0) + font = FT2Font(tmp.name) + cidFontDict = { 'Type': Name('Font'), 'Subtype': Name('CIDFontType2'), @@ -1170,21 +1183,12 @@ def embedTTFType42(font, characters, descriptor): # Make fontfile stream descriptor['FontFile2'] = fontfileObject - length1Object = self.reserveObject('decoded length of a font') self.beginStream( fontfileObject.id, self.reserveObject('length of font stream'), - {'Length1': length1Object}) - with open(filename, 'rb') as fontfile: - length1 = 0 - while True: - data = fontfile.read(4096) - if not data: - break - length1 += len(data) - self.currentstream.write(data) + {'Length1': len(fontdata)}) + self.currentstream.write(fontdata) self.endStream() - self.writeObject(length1Object, length1) # Make the 'W' (Widths) array, CidToGidMap and ToUnicode CMap # at the same time @@ -1307,6 +1311,25 @@ def embedTTFType42(font, characters, descriptor): elif fonttype == 42: return embedTTFType42(font, characters, descriptor) + @classmethod + def getSubset(self, fontfile, characters): + """Read TTF font from the given file and subset it for the given characters. + + Returns a serialization of the subset font as bytes.""" + + options = subset.Options(glyph_names=True, recommended_glyphs=True) + options.drop_tables += ['FFTM'] + font = subset.load_font(fontfile, options) + try: + subsetter = subset.Subsetter(options=options) + subsetter.populate(text=characters) + subsetter.subset(font) + fh = BytesIO() + font.save(fh, reorderTables=False) + return fh.getvalue() + finally: + font.close() + def alphaState(self, alpha): """Return name of an ExtGState that sets alpha to the given value.""" diff --git a/lib/matplotlib/testing/conftest.py b/lib/matplotlib/testing/conftest.py index 391dd5d49d38..a328f2dbb9cc 100644 --- a/lib/matplotlib/testing/conftest.py +++ b/lib/matplotlib/testing/conftest.py @@ -16,7 +16,7 @@ def pytest_configure(config): ("markers", "style: Set alternate Matplotlib style temporarily."), ("markers", "baseline_images: Compare output against references."), ("markers", "pytz: Tests that require pytz to be installed."), - ("filterwarnings", "error"), + #("filterwarnings", "error"), # fontTools.subset raises a pointless DeprecationWarning ]: config.addinivalue_line(key, value) diff --git a/setup.py b/setup.py index 7f08fa09d6eb..b6556174d0e6 100644 --- a/setup.py +++ b/setup.py @@ -279,6 +279,7 @@ def build_extensions(self): ], install_requires=[ "cycler>=0.10", + "fonttools>=4.13.0,<5.0", "kiwisolver>=1.0.1", "numpy>=1.16", "pillow>=6.2.0", From 468c52c38898b22872229450393c831e93194597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sat, 1 Aug 2020 18:46:50 +0300 Subject: [PATCH 2/4] flake8 --- lib/matplotlib/backends/backend_pdf.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index f6875564ecad..ee00d09404a2 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -1148,15 +1148,21 @@ def embedTTFType42(font, characters, descriptor): wObject = self.reserveObject('Type 0 widths') toUnicodeMapObject = self.reserveObject('ToUnicode map') - print(f"SUBSET {filename} characters: {''.join(chr(c) for c in characters)}") - fontdata = self.getSubset(filename, ''.join(chr(c) for c in characters)) - print(f'SUBSET {filename} {os.stat(filename).st_size} -> {len(fontdata)}') + print(f"SUBSET {filename} characters: " + f"{''.join(chr(c) for c in characters)}") + fontdata = self.getSubset( + filename, + ''.join(chr(c) for c in characters) + ) + print(f'SUBSET {filename} {os.stat(filename).st_size}' + f' ↦ {len(fontdata)}') # reload the font object from the subset - # (all the necessary data could probably be obtained directly using fontLib.ttLib) + # (all the necessary data could probably be obtained directly + # using fontLib.ttLib) with tempfile.NamedTemporaryFile(suffix='.ttf') as tmp: tmp.write(fontdata) - tmp.seek(0,0) + tmp.seek(0, 0) font = FT2Font(tmp.name) cidFontDict = { @@ -1313,9 +1319,11 @@ def embedTTFType42(font, characters, descriptor): @classmethod def getSubset(self, fontfile, characters): - """Read TTF font from the given file and subset it for the given characters. + """ + Read TTF font from the given file and subset it for the given characters. - Returns a serialization of the subset font as bytes.""" + Returns a serialization of the subset font as bytes. + """ options = subset.Options(glyph_names=True, recommended_glyphs=True) options.drop_tables += ['FFTM'] From 9e01acaa7afc8339ae06d0bd313b509aebec7a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sat, 1 Aug 2020 18:55:54 +0300 Subject: [PATCH 3/4] Filter out just the py23 warning --- lib/matplotlib/testing/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/testing/conftest.py b/lib/matplotlib/testing/conftest.py index a328f2dbb9cc..8b8e3a8a3e9a 100644 --- a/lib/matplotlib/testing/conftest.py +++ b/lib/matplotlib/testing/conftest.py @@ -16,7 +16,9 @@ def pytest_configure(config): ("markers", "style: Set alternate Matplotlib style temporarily."), ("markers", "baseline_images: Compare output against references."), ("markers", "pytz: Tests that require pytz to be installed."), - #("filterwarnings", "error"), # fontTools.subset raises a pointless DeprecationWarning + ("filterwarnings", "error"), + ("filterwarnings", + "ignore:.*The py23 module has been deprecated:DeprecationWarning"), ]: config.addinivalue_line(key, value) From 591f9a89954daf405cecdcdb72cb4eee6ddc0153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Sat, 1 Aug 2020 19:06:34 +0300 Subject: [PATCH 4/4] More flake8 --- lib/matplotlib/backends/backend_pdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index ee00d09404a2..a3e30926a6f4 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -1320,8 +1320,9 @@ def embedTTFType42(font, characters, descriptor): @classmethod def getSubset(self, fontfile, characters): """ - Read TTF font from the given file and subset it for the given characters. + Subset a TTF font + Reads the named fontfile and restricts the font to the characters. Returns a serialization of the subset font as bytes. """ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy