From 9344c664251bd761ee12532a318992b121d9c166 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 6 Mar 2022 06:55:35 +0000 Subject: [PATCH 1/3] bpo-37609 - Support "UNC" and "GLOBAL" junctions in `ntpath.splitdrive()`. Co-authored-by: Eryk Sun --- Doc/library/os.path.rst | 2 +- Lib/ntpath.py | 207 ++++++++++++++---- Lib/test/test_ntpath.py | 27 ++- .../2022-03-06-07-03-02.bpo-37609.BkLg4n.rst | 2 + 4 files changed, 189 insertions(+), 49 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-06-07-03-02.bpo-37609.BkLg4n.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 6b15a113f54506..3cdd5ba4289f33 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -476,7 +476,7 @@ the :mod:`glob` module.) ("c:", "/dir") If the path contains a UNC path, drive will contain the host name - and share, up to but not including the fourth separator:: + and share:: >>> splitdrive("//host/computer/dir") ("//host/computer", "/dir") diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 041ebc75cb127c..7aeb3081e6e2c5 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -122,54 +122,169 @@ def join(path, *paths): # colon) and the path specification. # It is always true that drivespec + pathspec == p def splitdrive(p): - """Split a pathname into drive/UNC sharepoint and relative path specifiers. - Returns a 2-tuple (drive_or_unc, path); either part may be empty. - - If you assign - result = splitdrive(p) - It is always true that: - result[0] + result[1] == p - - If the path contained a drive letter, drive_or_unc will contain everything - up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir") - - If the path contained a UNC path, the drive_or_unc will contain the host name - and share up to but not including the fourth directory separator character. - e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir") - - Paths cannot contain both a drive letter and a UNC path. - + """Split path p conservatively into a drive and remaining path. + Returns a 2-tuple, (drive, rest). Either component may be empty. + + If the source path contains a DOS drive (i.e. a letter plus a colon), the + remaining path is everything after the colon. + + DOS drive examples: + + splitdrive('C:') == ('C:', '') + splitdrive('C:dir') == ('C:', 'dir') + splitdrive('C:/') == ('C:', '/') + splitdrive('C:/dir') == ('C:', '/dir') + + A UNC path is parsed as follows: + + drive + vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv + "//" domain "/" junction ["/" junction] ["/" object] + ^^^^^^^^^^^^ + rest + + The UNC root must be exactly two separators. Other separators may be + repeated. + + This is a generalization of the UNC specification in [MS-DTYP] 2.2.57. The + latter specifies the file namespace, for which the domain is referred to + as "host-name" (more generally "server") and the junction as "share-name". + The server is commonly a local or remote network name (i.e. NETBIOS name, + DNS name, or IP address). It can also be a non-network server provided by + a local redirector. The share is a resource provided by the server, such + as a file-system directory. + + UNC drive examples in the file namespace: + + splitdrive('//server/share') == ('//server/share', '') + splitdrive('//server///share') == ('//server///share', '') + splitdrive('//server/share/') == ('//server/share', '/') + splitdrive('//server/share/dir') == ('//server/share', '/dir') + + The other supported namespace is the device namespace, which is mapped as + two domains, "." and "?". These domains are handled differently in some + contexts, such as when creating or opening a file, but for our puposes + here they are equivalent. In this namespace, the junction is case- + insensitive. Any device junction is recognized as a UNC drive, with + two exceptions that require additional qualification: "GLOBAL" and "UNC". + + Normally the device namespace includes the local device junctions of a + user, such as mapped and subst drives. The "GLOBAL" junction limits this + view to just global devices. It must be followed either by a device + junction or another "GLOBAL" junction. + + The equivalent of the UNC file namespace in the device namespace is the + "UNC" device junction, but only when there is a remaining path (e.g. at + least a trailing separator). For consistency with the file namespace, if + the "UNC" device junction has a reminaing path, it must include a server + and share in order to be recognized as a drive. + + UNC drive examples in the device namespace: + + splitdrive('//./C:') == ('//./C:', '') + splitdrive('//?/C:/dir') == ('//?/C:', '/dir') + + splitdrive('//./UNC') == ('//./UNC', '') + splitdrive('//?/UNC/server/share') == ('//?/UNC/server/share', '') + splitdrive('//?/UNC/server/share/dir') == ( + '//?/UNC/server/share', '/dir') + + splitdrive('//./Global/C:') == ('//./Global/C:', '') + splitdrive('//?/Global/Global/C:/') == ('//?/Global/Global/C:', '/') + splitdrive('//?/Global/UNC/server/share/dir') == ( + '//?/Global/UNC/server/share', '/dir') + + Examples with no drive: + + splitdrive('') == ('', '') + splitdrive('dir') == ('', 'dir') + splitdrive('/dir') == ('', '/dir') + + splitdrive('//') == ('', '//') + splitdrive('//server/') == ('', '//server/') + splitdrive('///server/share') == ('', '///server/share') + + splitdrive('//?/UNC/') == ('', '//?/UNC/') + splitdrive('//?/UNC/server/') == ('', '//?/UNC/server/') + splitdrive('//?/Global') == ('', '//?/Global') """ p = os.fspath(p) - if len(p) >= 2: - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - else: - sep = '\\' - altsep = '/' - colon = ':' - normp = p.replace(altsep, sep) - if (normp[0:2] == sep*2) and (normp[2:3] != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, 2) - if index == -1: - return p[:0], p - index2 = normp.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 == index + 1: - return p[:0], p - if index2 == -1: - index2 = len(p) - return p[:index2], p[index2:] - if normp[1:2] == colon: - return p[:2], p[2:] - return p[:0], p + if isinstance(p, bytes): + empty = b'' + colon = b':' + sep = b'\\' + altsep = b'/' + device_domains = (b'?', b'.') + global_name = b'GLOBAL' + unc_name = b'UNC' + else: + empty = '' + colon = ':' + sep = '\\' + altsep = '/' + device_domains = ('?', '.') + global_name = 'GLOBAL' + unc_name = 'UNC' + + # Check for a DOS drive. + if p[:1].isalpha() and p[1:2] == colon: + return p[:2], p[2:] + + # UNC drive for the file and device namespaces. + # \\domain\junction\object + # Separators may be repeated, except at the root. + + def _next(): + '''Get the next component, ignoring repeated separators.''' + i0 = index + while normp[i0:i0+1] == sep: + i0 += 1 + if i0 >= len(p): + return -1, len(p) + i1 = normp.find(sep, i0) + if i1 == -1: + i1 = len(p) + return i0, i1 + + index = 0 + normp = p.replace(altsep, sep) + # Consume the domain (server). + i, index = _next() + if i != 2: + return empty, p + domain = p[i:index] + # Consume the junction (share). + i, index = _next() + if i == -1: + return empty, p + + if domain not in device_domains: + return p[:index], p[index:] + + # GLOBAL and UNC are special in the device domain. + junction = p[i:index].upper() + # GLOBAL can be repeated. + while junction == global_name: + i, index = _next() + if i == -1: + # GLOBAL must be a prefix. + return empty, p + junction = p[i:index].upper() + + if junction == unc_name: + # Allow the "UNC" device with no remaining path. + if index == len(p): + return p, empty + # Consume the meta-domain (server). + i, index = _next() + if i == -1: + return empty, p + # Consume the meta-junction (share). + i, index = _next() + if i == -1: + return empty, p + + return p[:index], p[index:] # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 99a77e3fb43dc8..d1f5091d3ba767 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -111,12 +111,35 @@ def test_splitdrive(self): tester('ntpath.splitdrive("///conky/mountpoint/foo/bar")', ('', '///conky/mountpoint/foo/bar')) tester('ntpath.splitdrive("\\\\conky\\\\mountpoint\\foo\\bar")', - ('', '\\\\conky\\\\mountpoint\\foo\\bar')) + ('\\\\conky\\\\mountpoint', '\\foo\\bar')) tester('ntpath.splitdrive("//conky//mountpoint/foo/bar")', - ('', '//conky//mountpoint/foo/bar')) + ('//conky//mountpoint', '/foo/bar')) # Issue #19911: UNC part containing U+0130 self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'), ('//conky/MOUNTPOİNT', '/foo/bar')) + # bpo-37609: support UNC drives in the device namespace. + tester('ntpath.splitdrive("//./UNC")', ("//./UNC", "")) + tester('ntpath.splitdrive("//./Global/UNC")', ("//./Global/UNC", "")) + tester('ntpath.splitdrive("//./Global/Global/UNC")', ("//./Global/Global/UNC", "")) + tester('ntpath.splitdrive("//./Global")', ("", "//./Global")) + tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/")) + tester('ntpath.splitdrive("//?/UNC/server/")', ("", "//?/UNC/server/")) + tester('ntpath.splitdrive("//?/UNC/server/share")',("//?/UNC/server/share", "")) + tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) + tester('ntpath.splitdrive("//?/Global/UNC/server/share/dir")', + ("//?/Global/UNC/server/share", "/dir")) + tester('ntpath.splitdrive("\\\\.\\UNC")', ("\\\\.\\UNC", "")) + tester('ntpath.splitdrive("\\\\.\\Global\\UNC")', ("\\\\.\\Global\\UNC", "")) + tester('ntpath.splitdrive("\\\\.\\Global\\Global\\UNC")', + ("\\\\.\\Global\\Global\\UNC", "")) + tester('ntpath.splitdrive("\\\\.\\Global")', ("", "\\\\.\\Global")) + tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("", "\\\\?\\UNC\\server\\")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")',("\\\\?\\UNC\\server\\share", "")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")', + ("\\\\?\\UNC\\server\\share", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\Global\\UNC\\server\\share\\dir")', + ("\\\\?\\Global\\UNC\\server\\share", "\\dir")) def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) diff --git a/Misc/NEWS.d/next/Library/2022-03-06-07-03-02.bpo-37609.BkLg4n.rst b/Misc/NEWS.d/next/Library/2022-03-06-07-03-02.bpo-37609.BkLg4n.rst new file mode 100644 index 00000000000000..a5e771fea28325 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-06-07-03-02.bpo-37609.BkLg4n.rst @@ -0,0 +1,2 @@ +:func:`os.path.splitdrive` now understands ``UNC`` and ``GLOBAL`` junctions +in Windows device paths. Contributed by Barney Gale and Eryk Sun. From 56b6f043a900746427c18dff82cfb20dd36135e5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 6 Mar 2022 07:46:42 +0000 Subject: [PATCH 2/3] Fix tests; remove requirement that drive letter is alphabetic. --- Lib/ntpath.py | 2 +- Lib/test/test_zipfile.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 7aeb3081e6e2c5..22d500e0ad46d0 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -227,7 +227,7 @@ def splitdrive(p): unc_name = 'UNC' # Check for a DOS drive. - if p[:1].isalpha() and p[1:2] == colon: + if p[1:2] == colon: return p[:2], p[2:] # UNC drive for the file and device namespaces. diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index de2dd33f436609..cdd2e9c174f95d 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -1457,10 +1457,10 @@ def test_extract_hackers_arcnames_windows_only(self): (r'C:\foo\bar', 'foo/bar'), (r'//conky/mountpoint/foo/bar', 'foo/bar'), (r'\\conky\mountpoint\foo\bar', 'foo/bar'), - (r'///conky/mountpoint/foo/bar', 'conky/mountpoint/foo/bar'), - (r'\\\conky\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'), - (r'//conky//mountpoint/foo/bar', 'conky/mountpoint/foo/bar'), - (r'\\conky\\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'), + (r'///conky/mountpoint/foo/bar', 'foo/bar'), + (r'\\\conky\mountpoint\foo\bar', 'foo/bar'), + (r'//conky//mountpoint/foo/bar', 'foo/bar'), + (r'\\conky\\mountpoint\foo\bar', 'foo/bar'), (r'//?/C:/foo/bar', 'foo/bar'), (r'\\?\C:\foo\bar', 'foo/bar'), (r'C:/../C:/foo/bar', 'C_/foo/bar'), From c173d45cf48cf763b4b4b77d1c0233cd9026a321 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 6 Mar 2022 08:14:23 +0000 Subject: [PATCH 3/3] Fix tests (attempt 2!) --- Lib/test/test_zipfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index cdd2e9c174f95d..6f9d5ebeb430ce 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -1457,8 +1457,8 @@ def test_extract_hackers_arcnames_windows_only(self): (r'C:\foo\bar', 'foo/bar'), (r'//conky/mountpoint/foo/bar', 'foo/bar'), (r'\\conky\mountpoint\foo\bar', 'foo/bar'), - (r'///conky/mountpoint/foo/bar', 'foo/bar'), - (r'\\\conky\mountpoint\foo\bar', 'foo/bar'), + (r'///conky/mountpoint/foo/bar', 'conky/mountpoint/foo/bar'), + (r'\\\conky\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'), (r'//conky//mountpoint/foo/bar', 'foo/bar'), (r'\\conky\\mountpoint\foo\bar', 'foo/bar'), (r'//?/C:/foo/bar', 'foo/bar'), pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy