Skip to content

Commit bd127f3

Browse files
authored
Merge pull request #20633 from sauerburger/pdf-non-bmp-chars
Emit non BMP chars as XObjects in PDF
2 parents 1a8caa5 + a4067a0 commit bd127f3

File tree

3 files changed

+67
-22
lines changed

3 files changed

+67
-22
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,21 @@ def pdfRepr(obj):
321321
.format(type(obj)))
322322

323323

324+
def _font_supports_char(fonttype, char):
325+
"""
326+
Returns True if the font is able to provide *char* in a PDF.
327+
328+
For a Type 3 font, this method returns True only for single-byte
329+
chars. For Type 42 fonts this method return True if the char is from
330+
the Basic Multilingual Plane.
331+
"""
332+
if fonttype == 3:
333+
return ord(char) <= 255
334+
if fonttype == 42:
335+
return ord(char) <= 65535
336+
raise NotImplementedError()
337+
338+
324339
class Reference:
325340
"""
326341
PDF reference object.
@@ -1268,13 +1283,48 @@ def embedTTFType42(font, characters, descriptor):
12681283

12691284
unicode_bfrange = []
12701285
for start, end in unicode_groups:
1286+
# Ensure the CID map contains only chars from BMP
1287+
if start > 65535:
1288+
continue
1289+
end = min(65535, end)
1290+
12711291
unicode_bfrange.append(
12721292
b"<%04x> <%04x> [%s]" %
12731293
(start, end,
12741294
b" ".join(b"<%04x>" % x for x in range(start, end+1))))
12751295
unicode_cmap = (self._identityToUnicodeCMap %
12761296
(len(unicode_groups), b"\n".join(unicode_bfrange)))
12771297

1298+
# Add XObjects for unsupported chars
1299+
glyph_ids = []
1300+
for ccode in characters:
1301+
if not _font_supports_char(fonttype, chr(ccode)):
1302+
gind = font.get_char_index(ccode)
1303+
glyph_ids.append(gind)
1304+
1305+
bbox = [cvt(x, nearest=False) for x in font.bbox]
1306+
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1307+
for charname in sorted(rawcharprocs):
1308+
stream = rawcharprocs[charname]
1309+
charprocDict = {'Length': len(stream)}
1310+
charprocDict['Type'] = Name('XObject')
1311+
charprocDict['Subtype'] = Name('Form')
1312+
charprocDict['BBox'] = bbox
1313+
# Each glyph includes bounding box information,
1314+
# but xpdf and ghostscript can't handle it in a
1315+
# Form XObject (they segfault!!!), so we remove it
1316+
# from the stream here. It's not needed anyway,
1317+
# since the Form XObject includes it in its BBox
1318+
# value.
1319+
stream = stream[stream.find(b"d1") + 2:]
1320+
charprocObject = self.reserveObject('charProc')
1321+
self.beginStream(charprocObject.id, None, charprocDict)
1322+
self.currentstream.write(stream)
1323+
self.endStream()
1324+
1325+
name = self._get_xobject_symbol_name(filename, charname)
1326+
self.multi_byte_charprocs[name] = charprocObject
1327+
12781328
# CIDToGIDMap stream
12791329
cid_to_gid_map = "".join(cid_to_gid_map).encode("utf-16be")
12801330
self.beginStream(cidToGidMapObject.id,
@@ -2106,16 +2156,17 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21062156
self.check_gc(gc, gc._rgb)
21072157
prev_font = None, None
21082158
oldx, oldy = 0, 0
2109-
type3_multibytes = []
2159+
unsupported_chars = []
21102160

21112161
self.file.output(Op.begin_text)
21122162
for font, fontsize, num, ox, oy in glyphs:
2113-
self.file._character_tracker.track(font, chr(num))
2163+
char = chr(num)
2164+
self.file._character_tracker.track(font, char)
21142165
fontname = font.fname
2115-
if fonttype == 3 and num > 255:
2116-
# For Type3 fonts, multibyte characters must be emitted
2117-
# separately (below).
2118-
type3_multibytes.append((font, fontsize, ox, oy, num))
2166+
if not _font_supports_char(fonttype, char):
2167+
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
2168+
# Type 42) must be emitted separately (below).
2169+
unsupported_chars.append((font, fontsize, ox, oy, num))
21192170
else:
21202171
self._setup_textpos(ox, oy, 0, oldx, oldy)
21212172
oldx, oldy = ox, oy
@@ -2127,7 +2178,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
21272178
Op.show)
21282179
self.file.output(Op.end_text)
21292180

2130-
for font, fontsize, ox, oy, num in type3_multibytes:
2181+
for font, fontsize, ox, oy, num in unsupported_chars:
21312182
self._draw_xobject_glyph(
21322183
font, fontsize, font.get_char_index(num), ox, oy)
21332184

@@ -2236,20 +2287,6 @@ def encode_string(self, s, fonttype):
22362287
return s.encode('cp1252', 'replace')
22372288
return s.encode('utf-16be', 'replace')
22382289

2239-
@staticmethod
2240-
def _font_supports_char(fonttype, char):
2241-
"""
2242-
Returns True if the font is able to provided the char in a PDF
2243-
2244-
For a Type 3 font, this method returns True only for single-byte
2245-
chars. For Type 42 fonts this method always returns True.
2246-
"""
2247-
if fonttype == 3:
2248-
return ord(char) <= 255
2249-
if fonttype == 42:
2250-
return True
2251-
raise NotImplementedError()
2252-
22532290
def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
22542291
# docstring inherited
22552292

@@ -2313,7 +2350,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23132350
prev_was_multibyte = True
23142351
for item in _text_helpers.layout(
23152352
s, font, kern_mode=KERNING_UNFITTED):
2316-
if self._font_supports_char(fonttype, item.char):
2353+
if _font_supports_char(fonttype, item.char):
23172354
if prev_was_multibyte:
23182355
singlebyte_chunks.append((item.x, []))
23192356
if item.prev_kern:
Binary file not shown.

lib/matplotlib/tests/test_text.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,3 +748,11 @@ def test_pdf_font42_kerning():
748748
plt.rcParams['pdf.fonttype'] = 42
749749
plt.figure()
750750
plt.figtext(0.1, 0.5, "ATAVATAVATAVATAVATA", size=30)
751+
752+
753+
@image_comparison(['text_pdf_chars_beyond_bmp.pdf'], style='mpl20')
754+
def test_pdf_chars_beyond_bmp():
755+
plt.rcParams['pdf.fonttype'] = 42
756+
plt.rcParams['mathtext.fontset'] = 'stixsans'
757+
plt.figure()
758+
plt.figtext(0.1, 0.5, "Mass $m$ \U00010308", size=30)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy