Skip to content

Commit 9fc720e

Browse files
davidkhesszooba
authored andcommitted
bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062)
1 parent 8bd2872 commit 9fc720e

File tree

4 files changed

+188
-120
lines changed

4 files changed

+188
-120
lines changed

Doc/library/mimetypes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ behavior of the module.
9393
Specifying an empty list for *files* will prevent the system defaults from
9494
being applied: only the well-known values will be present from a built-in list.
9595

96+
If *files* is ``None`` the internal data structure is completely rebuilt to its
97+
initial default value. This is a stable operation and will produce the same results
98+
when called multiple times.
99+
96100
.. versionchanged:: 3.2
97101
Previously, Windows registry settings were ignored.
98102

Lib/mimetypes.py

Lines changed: 131 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ class MimeTypes:
6666
def __init__(self, filenames=(), strict=True):
6767
if not inited:
6868
init()
69-
self.encodings_map = encodings_map.copy()
70-
self.suffix_map = suffix_map.copy()
69+
self.encodings_map = _encodings_map_default.copy()
70+
self.suffix_map = _suffix_map_default.copy()
7171
self.types_map = ({}, {}) # dict for (non-strict, strict)
7272
self.types_map_inv = ({}, {})
73-
for (ext, type) in types_map.items():
73+
for (ext, type) in _types_map_default.items():
7474
self.add_type(type, ext, True)
75-
for (ext, type) in common_types.items():
75+
for (ext, type) in _common_types_default.items():
7676
self.add_type(type, ext, False)
7777
for name in filenames:
7878
self.read(name, strict)
@@ -346,11 +346,19 @@ def init(files=None):
346346
global suffix_map, types_map, encodings_map, common_types
347347
global inited, _db
348348
inited = True # so that MimeTypes.__init__() doesn't call us again
349-
db = MimeTypes()
350-
if files is None:
349+
350+
if files is None or _db is None:
351+
db = MimeTypes()
351352
if _winreg:
352353
db.read_windows_registry()
353-
files = knownfiles
354+
355+
if files is None:
356+
files = knownfiles
357+
else:
358+
files = knownfiles + list(files)
359+
else:
360+
db = _db
361+
354362
for file in files:
355363
if os.path.isfile(file):
356364
db.read(file)
@@ -374,12 +382,12 @@ def read_mime_types(file):
374382

375383

376384
def _default_mime_types():
377-
global suffix_map
378-
global encodings_map
379-
global types_map
380-
global common_types
385+
global suffix_map, _suffix_map_default
386+
global encodings_map, _encodings_map_default
387+
global types_map, _types_map_default
388+
global common_types, _common_types_default
381389

382-
suffix_map = {
390+
suffix_map = _suffix_map_default = {
383391
'.svgz': '.svg.gz',
384392
'.tgz': '.tar.gz',
385393
'.taz': '.tar.gz',
@@ -388,7 +396,7 @@ def _default_mime_types():
388396
'.txz': '.tar.xz',
389397
}
390398

391-
encodings_map = {
399+
encodings_map = _encodings_map_default = {
392400
'.gz': 'gzip',
393401
'.Z': 'compress',
394402
'.bz2': 'bzip2',
@@ -399,152 +407,155 @@ def _default_mime_types():
399407
# at http://www.iana.org/assignments/media-types
400408
# or extensions, i.e. using the x- prefix
401409

402-
# If you add to these, please keep them sorted!
403-
types_map = {
410+
# If you add to these, please keep them sorted by mime type.
411+
# Make sure the entry with the preferred file extension for a particular mime type
412+
# appears before any others of the same mimetype.
413+
types_map = _types_map_default = {
414+
'.js' : 'application/javascript',
415+
'.mjs' : 'application/javascript',
416+
'.json' : 'application/json',
417+
'.doc' : 'application/msword',
418+
'.dot' : 'application/msword',
419+
'.wiz' : 'application/msword',
420+
'.bin' : 'application/octet-stream',
404421
'.a' : 'application/octet-stream',
422+
'.dll' : 'application/octet-stream',
423+
'.exe' : 'application/octet-stream',
424+
'.o' : 'application/octet-stream',
425+
'.obj' : 'application/octet-stream',
426+
'.so' : 'application/octet-stream',
427+
'.oda' : 'application/oda',
428+
'.pdf' : 'application/pdf',
429+
'.p7c' : 'application/pkcs7-mime',
430+
'.ps' : 'application/postscript',
405431
'.ai' : 'application/postscript',
406-
'.aif' : 'audio/x-aiff',
407-
'.aifc' : 'audio/x-aiff',
408-
'.aiff' : 'audio/x-aiff',
409-
'.au' : 'audio/basic',
410-
'.avi' : 'video/x-msvideo',
411-
'.bat' : 'text/plain',
432+
'.eps' : 'application/postscript',
433+
'.m3u' : 'application/vnd.apple.mpegurl',
434+
'.m3u8' : 'application/vnd.apple.mpegurl',
435+
'.xls' : 'application/vnd.ms-excel',
436+
'.xlb' : 'application/vnd.ms-excel',
437+
'.ppt' : 'application/vnd.ms-powerpoint',
438+
'.pot' : 'application/vnd.ms-powerpoint',
439+
'.ppa' : 'application/vnd.ms-powerpoint',
440+
'.pps' : 'application/vnd.ms-powerpoint',
441+
'.pwz' : 'application/vnd.ms-powerpoint',
442+
'.wasm' : 'application/wasm',
412443
'.bcpio' : 'application/x-bcpio',
413-
'.bin' : 'application/octet-stream',
414-
'.bmp' : 'image/bmp',
415-
'.c' : 'text/plain',
416-
'.cdf' : 'application/x-netcdf',
417444
'.cpio' : 'application/x-cpio',
418445
'.csh' : 'application/x-csh',
419-
'.css' : 'text/css',
420-
'.csv' : 'text/csv',
421-
'.dll' : 'application/octet-stream',
422-
'.doc' : 'application/msword',
423-
'.dot' : 'application/msword',
424446
'.dvi' : 'application/x-dvi',
425-
'.eml' : 'message/rfc822',
426-
'.eps' : 'application/postscript',
427-
'.etx' : 'text/x-setext',
428-
'.exe' : 'application/octet-stream',
429-
'.gif' : 'image/gif',
430447
'.gtar' : 'application/x-gtar',
431-
'.h' : 'text/plain',
432448
'.hdf' : 'application/x-hdf',
433-
'.htm' : 'text/html',
434-
'.html' : 'text/html',
435-
'.ico' : 'image/vnd.microsoft.icon',
436-
'.ief' : 'image/ief',
437-
'.jpe' : 'image/jpeg',
438-
'.jpeg' : 'image/jpeg',
439-
'.jpg' : 'image/jpeg',
440-
'.js' : 'application/javascript',
441-
'.json' : 'application/json',
442-
'.ksh' : 'text/plain',
443449
'.latex' : 'application/x-latex',
444-
'.m1v' : 'video/mpeg',
445-
'.m3u' : 'application/vnd.apple.mpegurl',
446-
'.m3u8' : 'application/vnd.apple.mpegurl',
447-
'.man' : 'application/x-troff-man',
448-
'.me' : 'application/x-troff-me',
449-
'.mht' : 'message/rfc822',
450-
'.mhtml' : 'message/rfc822',
451450
'.mif' : 'application/x-mif',
452-
'.mjs' : 'application/javascript',
453-
'.mov' : 'video/quicktime',
454-
'.movie' : 'video/x-sgi-movie',
455-
'.mp2' : 'audio/mpeg',
456-
'.mp3' : 'audio/mpeg',
457-
'.mp4' : 'video/mp4',
458-
'.mpa' : 'video/mpeg',
459-
'.mpe' : 'video/mpeg',
460-
'.mpeg' : 'video/mpeg',
461-
'.mpg' : 'video/mpeg',
462-
'.ms' : 'application/x-troff-ms',
451+
'.cdf' : 'application/x-netcdf',
463452
'.nc' : 'application/x-netcdf',
464-
'.nws' : 'message/rfc822',
465-
'.o' : 'application/octet-stream',
466-
'.obj' : 'application/octet-stream',
467-
'.oda' : 'application/oda',
468453
'.p12' : 'application/x-pkcs12',
469-
'.p7c' : 'application/pkcs7-mime',
470-
'.pbm' : 'image/x-portable-bitmap',
471-
'.pdf' : 'application/pdf',
472454
'.pfx' : 'application/x-pkcs12',
473-
'.pgm' : 'image/x-portable-graymap',
474-
'.pl' : 'text/plain',
475-
'.png' : 'image/png',
476-
'.pnm' : 'image/x-portable-anymap',
477-
'.pot' : 'application/vnd.ms-powerpoint',
478-
'.ppa' : 'application/vnd.ms-powerpoint',
479-
'.ppm' : 'image/x-portable-pixmap',
480-
'.pps' : 'application/vnd.ms-powerpoint',
481-
'.ppt' : 'application/vnd.ms-powerpoint',
482-
'.ps' : 'application/postscript',
483-
'.pwz' : 'application/vnd.ms-powerpoint',
484-
'.py' : 'text/x-python',
455+
'.ram' : 'application/x-pn-realaudio',
485456
'.pyc' : 'application/x-python-code',
486457
'.pyo' : 'application/x-python-code',
487-
'.qt' : 'video/quicktime',
488-
'.ra' : 'audio/x-pn-realaudio',
489-
'.ram' : 'application/x-pn-realaudio',
490-
'.ras' : 'image/x-cmu-raster',
491-
'.rdf' : 'application/xml',
492-
'.rgb' : 'image/x-rgb',
493-
'.roff' : 'application/x-troff',
494-
'.rtx' : 'text/richtext',
495-
'.sgm' : 'text/x-sgml',
496-
'.sgml' : 'text/x-sgml',
497458
'.sh' : 'application/x-sh',
498459
'.shar' : 'application/x-shar',
499-
'.snd' : 'audio/basic',
500-
'.so' : 'application/octet-stream',
501-
'.src' : 'application/x-wais-source',
460+
'.swf' : 'application/x-shockwave-flash',
502461
'.sv4cpio': 'application/x-sv4cpio',
503462
'.sv4crc' : 'application/x-sv4crc',
504-
'.svg' : 'image/svg+xml',
505-
'.swf' : 'application/x-shockwave-flash',
506-
'.t' : 'application/x-troff',
507463
'.tar' : 'application/x-tar',
508464
'.tcl' : 'application/x-tcl',
509465
'.tex' : 'application/x-tex',
510466
'.texi' : 'application/x-texinfo',
511467
'.texinfo': 'application/x-texinfo',
512-
'.tif' : 'image/tiff',
513-
'.tiff' : 'image/tiff',
468+
'.roff' : 'application/x-troff',
469+
'.t' : 'application/x-troff',
514470
'.tr' : 'application/x-troff',
515-
'.tsv' : 'text/tab-separated-values',
516-
'.txt' : 'text/plain',
471+
'.man' : 'application/x-troff-man',
472+
'.me' : 'application/x-troff-me',
473+
'.ms' : 'application/x-troff-ms',
517474
'.ustar' : 'application/x-ustar',
518-
'.vcf' : 'text/x-vcard',
519-
'.wasm' : 'application/wasm',
520-
'.wav' : 'audio/x-wav',
521-
'.webm' : 'video/webm',
522-
'.wiz' : 'application/msword',
475+
'.src' : 'application/x-wais-source',
476+
'.xsl' : 'application/xml',
477+
'.rdf' : 'application/xml',
523478
'.wsdl' : 'application/xml',
524-
'.xbm' : 'image/x-xbitmap',
525-
'.xlb' : 'application/vnd.ms-excel',
526-
'.xls' : 'application/vnd.ms-excel',
527-
'.xml' : 'text/xml',
528479
'.xpdl' : 'application/xml',
480+
'.zip' : 'application/zip',
481+
'.au' : 'audio/basic',
482+
'.snd' : 'audio/basic',
483+
'.mp3' : 'audio/mpeg',
484+
'.mp2' : 'audio/mpeg',
485+
'.aif' : 'audio/x-aiff',
486+
'.aifc' : 'audio/x-aiff',
487+
'.aiff' : 'audio/x-aiff',
488+
'.ra' : 'audio/x-pn-realaudio',
489+
'.wav' : 'audio/x-wav',
490+
'.bmp' : 'image/bmp',
491+
'.gif' : 'image/gif',
492+
'.ief' : 'image/ief',
493+
'.jpg' : 'image/jpeg',
494+
'.jpe' : 'image/jpeg',
495+
'.jpeg' : 'image/jpeg',
496+
'.png' : 'image/png',
497+
'.svg' : 'image/svg+xml',
498+
'.tiff' : 'image/tiff',
499+
'.tif' : 'image/tiff',
500+
'.ico' : 'image/vnd.microsoft.icon',
501+
'.ras' : 'image/x-cmu-raster',
502+
'.bmp' : 'image/x-ms-bmp',
503+
'.pnm' : 'image/x-portable-anymap',
504+
'.pbm' : 'image/x-portable-bitmap',
505+
'.pgm' : 'image/x-portable-graymap',
506+
'.ppm' : 'image/x-portable-pixmap',
507+
'.rgb' : 'image/x-rgb',
508+
'.xbm' : 'image/x-xbitmap',
529509
'.xpm' : 'image/x-xpixmap',
530-
'.xsl' : 'application/xml',
531510
'.xwd' : 'image/x-xwindowdump',
532-
'.zip' : 'application/zip',
511+
'.eml' : 'message/rfc822',
512+
'.mht' : 'message/rfc822',
513+
'.mhtml' : 'message/rfc822',
514+
'.nws' : 'message/rfc822',
515+
'.css' : 'text/css',
516+
'.csv' : 'text/csv',
517+
'.html' : 'text/html',
518+
'.htm' : 'text/html',
519+
'.txt' : 'text/plain',
520+
'.bat' : 'text/plain',
521+
'.c' : 'text/plain',
522+
'.h' : 'text/plain',
523+
'.ksh' : 'text/plain',
524+
'.pl' : 'text/plain',
525+
'.rtx' : 'text/richtext',
526+
'.tsv' : 'text/tab-separated-values',
527+
'.py' : 'text/x-python',
528+
'.etx' : 'text/x-setext',
529+
'.sgm' : 'text/x-sgml',
530+
'.sgml' : 'text/x-sgml',
531+
'.vcf' : 'text/x-vcard',
532+
'.xml' : 'text/xml',
533+
'.mp4' : 'video/mp4',
534+
'.mpeg' : 'video/mpeg',
535+
'.m1v' : 'video/mpeg',
536+
'.mpa' : 'video/mpeg',
537+
'.mpe' : 'video/mpeg',
538+
'.mpg' : 'video/mpeg',
539+
'.mov' : 'video/quicktime',
540+
'.qt' : 'video/quicktime',
541+
'.webm' : 'video/webm',
542+
'.avi' : 'video/x-msvideo',
543+
'.movie' : 'video/x-sgi-movie',
533544
}
534545

535546
# These are non-standard types, commonly found in the wild. They will
536547
# only match if strict=0 flag is given to the API methods.
537548

538549
# Please sort these too
539-
common_types = {
540-
'.jpg' : 'image/jpg',
541-
'.mid' : 'audio/midi',
550+
common_types = _common_types_default = {
551+
'.rtf' : 'application/rtf',
542552
'.midi': 'audio/midi',
553+
'.mid' : 'audio/midi',
554+
'.jpg' : 'image/jpg',
555+
'.pict': 'image/pict',
543556
'.pct' : 'image/pict',
544557
'.pic' : 'image/pict',
545-
'.pict': 'image/pict',
546-
'.rtf' : 'application/rtf',
547-
'.xul' : 'text/xul'
558+
'.xul' : 'text/xul',
548559
}
549560

550561

Lib/test/test_mimetypes.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,57 @@ def test_encoding(self):
7979
strict=True)
8080
self.assertEqual(exts, ['.g3', '.g\xb3'])
8181

82+
def test_init_reinitializes(self):
83+
# Issue 4936: make sure an init starts clean
84+
# First, put some poison into the types table
85+
mimetypes.add_type('foo/bar', '.foobar')
86+
self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
87+
# Reinitialize
88+
mimetypes.init()
89+
# Poison should be gone.
90+
self.assertEqual(mimetypes.guess_extension('foo/bar'), None)
91+
92+
def test_preferred_extension(self):
93+
def check_extensions():
94+
self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
95+
self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
96+
self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
97+
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
98+
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
99+
self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
100+
self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
101+
self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
102+
self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
103+
self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
104+
self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
105+
self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
106+
self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
107+
self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
108+
self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
109+
self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')
110+
111+
check_extensions()
112+
mimetypes.init()
113+
check_extensions()
114+
115+
def test_init_stability(self):
116+
mimetypes.init()
117+
118+
suffix_map = mimetypes.suffix_map
119+
encodings_map = mimetypes.encodings_map
120+
types_map = mimetypes.types_map
121+
common_types = mimetypes.common_types
122+
123+
mimetypes.init()
124+
self.assertIsNot(suffix_map, mimetypes.suffix_map)
125+
self.assertIsNot(encodings_map, mimetypes.encodings_map)
126+
self.assertIsNot(types_map, mimetypes.types_map)
127+
self.assertIsNot(common_types, mimetypes.common_types)
128+
self.assertEqual(suffix_map, mimetypes.suffix_map)
129+
self.assertEqual(encodings_map, mimetypes.encodings_map)
130+
self.assertEqual(types_map, mimetypes.types_map)
131+
self.assertEqual(common_types, mimetypes.common_types)
132+
82133
def test_path_like_ob(self):
83134
filename = "LICENSE.txt"
84135
filepath = pathlib.Path(filename)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy