diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 7c2b6ec0bffef5..899d3dacd8b04f 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -264,6 +264,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); + // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. // Usage: diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 2e290dcf9de9da..fe355b98907373 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -176,56 +176,52 @@ def splitdrive(p): return drive, root + tail -def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] +try: + from nt import _path_splitroot_ex as splitroot +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. + + The tail contains anything after the root.""" + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p + # Relative path, e.g. Windows + return empty, empty, p # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index f1e4237b3aa0e4..0a47f96be3fb03 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -135,33 +135,30 @@ def splitdrive(p): return p[:0], p -def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'/' - empty = b'' - else: - sep = '/' - empty = '' - if p[:1] != sep: - # Relative path, e.g.: 'foo' - return empty, empty, p - elif p[1:2] != sep or p[2:3] == sep: - # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. - return empty, sep, p[1:] - else: - # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see - # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 - return empty, p[:2], p[2:] +try: + from posix import _path_splitroot_ex as splitroot +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. + + The tail contains anything after the root.""" + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 9c6715b2cc37b4..79aceff4d95f60 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -354,6 +354,7 @@ def test_normpath(self): tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\') tester("ntpath.normpath('\\\\foo')", '\\\\foo') tester("ntpath.normpath('\\\\')", '\\\\') + tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\') def test_realpath_curdir(self): expected = ntpath.normpath(os.getcwd()) @@ -1010,6 +1011,8 @@ def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks in # genericpath.py. + self.assertTrue(os.path.splitroot is nt._path_splitroot_ex) + self.assertFalse(inspect.isfunction(os.path.splitroot)) self.assertTrue(os.path.normpath is nt._path_normpath) self.assertFalse(inspect.isfunction(os.path.normpath)) self.assertTrue(os.path.isdir is nt._path_isdir) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index cc4fd2f4c9512c..c8f559b9e06b89 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -279,6 +279,8 @@ def test_isjunction(self): def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks + self.assertTrue(os.path.splitroot is posix._path_splitroot_ex) + self.assertFalse(inspect.isfunction(os.path.splitroot)) self.assertTrue(os.path.normpath is posix._path_normpath) self.assertFalse(inspect.isfunction(os.path.normpath)) diff --git a/Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst b/Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst new file mode 100644 index 00000000000000..b395bad0c82fb6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst @@ -0,0 +1 @@ +Fix :func:`os.path.normpath` for UNC paths on Windows. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index a33461dc5600dd..e4bdc70b09b222 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2248,6 +2248,68 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ +PyDoc_STRVAR(os__path_splitroot_ex__doc__, +"_path_splitroot_ex($module, /, path)\n" +"--\n" +"\n" +"Split a pathname into drive, root and tail.\n" +"\n" +"The tail contains anything after the root."); + +#define OS__PATH_SPLITROOT_EX_METHODDEF \ + {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, path_t *path); + +static PyObject * +os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_splitroot_ex", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "path", 0, 1, 1, 0, 0); + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &path)) { + goto exit; + } + return_value = os__path_splitroot_ex_impl(module, &path); + +exit: + /* Cleanup for path */ + path_cleanup(&path); + + return return_value; +} + PyDoc_STRVAR(os__path_normpath__doc__, "_path_normpath($module, /, path)\n" "--\n" @@ -12032,4 +12094,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=6d34c4564aca7725 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=89eb0b9b741fd1c4 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index b8558cc2265a58..614462d3d920d7 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5282,6 +5282,61 @@ os__path_islink_impl(PyObject *module, path_t *path) #endif /* MS_WINDOWS */ +/*[clinic input] +os._path_splitroot_ex + + path: path_t(make_wide=True, nonstrict=True) + +Split a pathname into drive, root and tail. + +The tail contains anything after the root. +[clinic start generated code]*/ + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=4b0072b6cdf4b611 input=6eb76e9173412c92]*/ +{ + Py_ssize_t drvsize, rootsize; + PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + + const wchar_t *buffer = path->wide; + _Py_skiproot(buffer, path->length, &drvsize, &rootsize); + drv = PyUnicode_FromWideChar(buffer, drvsize); + if (drv == NULL) { + goto exit; + } + root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize); + if (root == NULL) { + goto exit; + } + tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], + path->length - drvsize - rootsize); + if (tail == NULL) { + goto exit; + } + if (PyBytes_Check(path->object)) { + Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); + if (drv == NULL) { + goto exit; + } + Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); + if (root == NULL) { + goto exit; + } + Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); + if (tail == NULL) { + goto exit; + } + } + result = PyTuple_Pack(3, drv, root, tail); +exit: + Py_XDECREF(drv); + Py_XDECREF(root); + Py_XDECREF(tail); + return result; +} + + /*[clinic input] os._path_normpath @@ -16054,6 +16109,7 @@ static PyMethodDef posix_methods[] = { OS__GETFINALPATHNAME_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF OS__PATH_SPLITROOT_METHODDEF + OS__PATH_SPLITROOT_EX_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF OS_URANDOM_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index c7521751cd26d1..a715de8907dcd7 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2292,6 +2292,99 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +void +_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, + Py_ssize_t *rootsize) +{ + assert(drvsize); + assert(rootsize); +#ifndef MS_WINDOWS +#define IS_SEP(x) (*(x) == SEP) + *drvsize = 0; + if (!IS_SEP(&path[0])) { + // Relative path, e.g.: 'foo' + *rootsize = 0; + } + else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) { + // Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + *rootsize = 1; + } + else { + // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + *rootsize = 2; + } +#undef IS_SEP +#else + const wchar_t *pEnd = size >= 0 ? &path[size] : NULL; +#define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) +#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) +#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) + if (IS_SEP(&path[0])) { + if (IS_SEP(&path[1])) { + // Device drives, e.g. \\.\device or \\?\device + // UNC drives, e.g. \\server\share or \\?\UNC\server\share + Py_ssize_t idx; + if (path[2] == L'?' && IS_SEP(&path[3]) && + (path[4] == L'U' || path[4] == L'u') && + (path[5] == L'N' || path[5] == L'n') && + (path[6] == L'C' || path[6] == L'c') && + IS_SEP(&path[7])) + { + idx = 8; + } + else { + idx = 2; + } + while (!SEP_OR_END(&path[idx])) { + idx++; + } + if (IS_END(&path[idx])) { + *drvsize = idx; + *rootsize = 0; + } + else { + idx++; + while (!SEP_OR_END(&path[idx])) { + idx++; + } + *drvsize = idx; + if (IS_END(&path[idx])) { + *rootsize = 0; + } + else { + *rootsize = 1; + } + } + } + else { + // Relative path with root, e.g. \Windows + *drvsize = 0; + *rootsize = 1; + } + } + else if (!IS_END(&path[0]) && path[1] == L':') { + *drvsize = 2; + if (IS_SEP(&path[2])) { + // Absolute drive-letter path, e.g. X:\Windows + *rootsize = 1; + } + else { + // Relative path with drive, e.g. X:Windows + *rootsize = 0; + } + } + else { + // Relative path, e.g. Windows + *drvsize = 0; + *rootsize = 0; + } +#undef SEP_OR_END +#undef IS_SEP +#undef IS_END +#endif +} + // The caller must ensure "buffer" is big enough. static int join_relfile(wchar_t *buffer, size_t bufsize, @@ -2408,49 +2501,39 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) #endif #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) - // Skip leading '.\' if (p1[0] == L'.' && IS_SEP(&p1[1])) { + // Skip leading '.\' path = &path[2]; - while (IS_SEP(path) && !IS_END(path)) { + while (IS_SEP(path)) { path++; } p1 = p2 = minP2 = path; lastC = SEP; } + else { + Py_ssize_t drvsize, rootsize; + _Py_skiproot(path, size, &drvsize, &rootsize); + if (drvsize || rootsize) { + // Skip past root and update minP2 + p1 = &path[drvsize + rootsize]; +#ifndef ALTSEP + p2 = p1; +#else + for (; p2 < p1; ++p2) { + if (*p2 == ALTSEP) { + *p2 = SEP; + } + } +#endif + minP2 = p2 - 1; + lastC = *minP2; #ifdef MS_WINDOWS - // Skip past drive segment and update minP2 - else if (p1[0] && p1[1] == L':') { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2; - lastC = L':'; - } - // Skip past all \\-prefixed paths, including \\?\, \\.\, - // and network paths, including the first segment. - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) { - int sepCount = 2; - *p2++ = SEP; - *p2++ = SEP; - p1 += 2; - for (; !IS_END(p1) && sepCount; ++p1) { - if (IS_SEP(p1)) { - --sepCount; - *p2++ = lastC = SEP; - } else { - *p2++ = lastC = *p1; + if (lastC != SEP) { + minP2++; } +#endif } - minP2 = p2 - 1; - } -#else - // Skip past two leading SEPs - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2 - 1; // Absolute path has SEP at minP2 - lastC = SEP; } -#endif /* MS_WINDOWS */ /* if pEnd is specified, check that. Else, check for null terminator */ for (; !IS_END(p1); ++p1) {
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: