From 6872edd4264b45f608dda459c6b37997d3c7429c Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Thu, 25 Apr 2024 11:07:38 +0200 Subject: [PATCH 1/5] gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089) --- Include/internal/pycore_fileutils.h | 2 + Lib/ntpath.py | 116 ++++++++------ Lib/posixpath.py | 74 +++++---- Lib/test/test_ntpath.py | 1 + ...-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 1 + Modules/clinic/posixmodule.c.h | 63 +++++++- Modules/posixmodule.c | 44 ++++++ Python/fileutils.c | 147 ++++++++++++++---- 8 files changed, 340 insertions(+), 108 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 7c2b6ec0bffef5..899d3dacd8b04f 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -264,6 +264,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); + // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. // Usage: diff --git a/Lib/ntpath.py b/Lib/ntpath.py index df3402d46c9cc6..a9155365283a23 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -176,56 +176,76 @@ def splitdrive(p): return drive, root + tail -def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] +try: + from nt import _path_splitroot_ex +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p + # Relative path, e.g. Windows + return empty, empty, p +else: + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index bd81c09bb2bdd0..6b18890fc93af7 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -135,33 +135,53 @@ def splitdrive(p): return p[:0], p -def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'/' - empty = b'' - else: - sep = '/' - empty = '' - if p[:1] != sep: - # Relative path, e.g.: 'foo' - return empty, empty, p - elif p[1:2] != sep or p[2:3] == sep: - # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. - return empty, sep, p[1:] - else: - # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see - # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 - return empty, p[:2], p[2:] +try: + from posix import _path_splitroot_ex +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] +else: + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + # Optimisation: the drive is always empty + _, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return b'', os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index d91dcdfb0c5fac..f4d3f310b6b074 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -354,6 +354,7 @@ def test_normpath(self): tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\') tester("ntpath.normpath('\\\\foo')", '\\\\foo') tester("ntpath.normpath('\\\\')", '\\\\') + tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\') def test_realpath_curdir(self): expected = ntpath.normpath(os.getcwd()) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst new file mode 100644 index 00000000000000..dfdf250710778e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst @@ -0,0 +1 @@ +Speed up :func:`os.path.splitroot` with a native implementation. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 3802182143c76e..f6b460bef5b16d 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2208,6 +2208,67 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ +PyDoc_STRVAR(os__path_splitroot_ex__doc__, +"_path_splitroot_ex($module, /, path)\n" +"--\n" +"\n"); + +#define OS__PATH_SPLITROOT_EX_METHODDEF \ + {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path); + +static PyObject * +os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_splitroot_ex", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *path; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("_path_splitroot_ex", "argument 'path'", "str", args[0]); + goto exit; + } + if (PyUnicode_READY(args[0]) == -1) { + goto exit; + } + path = args[0]; + return_value = os__path_splitroot_ex_impl(module, path); + +exit: + return return_value; +} + PyDoc_STRVAR(os__path_normpath__doc__, "_path_normpath($module, /, path)\n" "--\n" @@ -11999,4 +12060,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=56e83d6b7cac0d58 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a0b0e187a6d1c331 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 23cfcdeca246e1..8d561c7cb4c62e 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5286,6 +5286,49 @@ os__path_islink_impl(PyObject *module, PyObject *path) #endif /* MS_WINDOWS */ +/*[clinic input] +os._path_splitroot_ex + + path: unicode + +[clinic start generated code]*/ + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path) +/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/ +{ + Py_ssize_t len, drvsize, rootsize; + PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + + wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); + if (!buffer) { + goto exit; + } + + _Py_skiproot(buffer, len, &drvsize, &rootsize); + drv = PyUnicode_FromWideChar(buffer, drvsize); + if (drv == NULL) { + goto exit; + } + root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize); + if (root == NULL) { + goto exit; + } + tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], + len - drvsize - rootsize); + if (tail == NULL) { + goto exit; + } + result = Py_BuildValue("(OOO)", drv, root, tail); +exit: + PyMem_Free(buffer); + Py_XDECREF(drv); + Py_XDECREF(root); + Py_XDECREF(tail); + return result; +} + + /*[clinic input] os._path_normpath @@ -16029,6 +16072,7 @@ static PyMethodDef posix_methods[] = { OS__GETFINALPATHNAME_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF OS__PATH_SPLITROOT_METHODDEF + OS__PATH_SPLITROOT_EX_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF OS_URANDOM_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index c7521751cd26d1..a715de8907dcd7 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2292,6 +2292,99 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +void +_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, + Py_ssize_t *rootsize) +{ + assert(drvsize); + assert(rootsize); +#ifndef MS_WINDOWS +#define IS_SEP(x) (*(x) == SEP) + *drvsize = 0; + if (!IS_SEP(&path[0])) { + // Relative path, e.g.: 'foo' + *rootsize = 0; + } + else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) { + // Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + *rootsize = 1; + } + else { + // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + *rootsize = 2; + } +#undef IS_SEP +#else + const wchar_t *pEnd = size >= 0 ? &path[size] : NULL; +#define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) +#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) +#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) + if (IS_SEP(&path[0])) { + if (IS_SEP(&path[1])) { + // Device drives, e.g. \\.\device or \\?\device + // UNC drives, e.g. \\server\share or \\?\UNC\server\share + Py_ssize_t idx; + if (path[2] == L'?' && IS_SEP(&path[3]) && + (path[4] == L'U' || path[4] == L'u') && + (path[5] == L'N' || path[5] == L'n') && + (path[6] == L'C' || path[6] == L'c') && + IS_SEP(&path[7])) + { + idx = 8; + } + else { + idx = 2; + } + while (!SEP_OR_END(&path[idx])) { + idx++; + } + if (IS_END(&path[idx])) { + *drvsize = idx; + *rootsize = 0; + } + else { + idx++; + while (!SEP_OR_END(&path[idx])) { + idx++; + } + *drvsize = idx; + if (IS_END(&path[idx])) { + *rootsize = 0; + } + else { + *rootsize = 1; + } + } + } + else { + // Relative path with root, e.g. \Windows + *drvsize = 0; + *rootsize = 1; + } + } + else if (!IS_END(&path[0]) && path[1] == L':') { + *drvsize = 2; + if (IS_SEP(&path[2])) { + // Absolute drive-letter path, e.g. X:\Windows + *rootsize = 1; + } + else { + // Relative path with drive, e.g. X:Windows + *rootsize = 0; + } + } + else { + // Relative path, e.g. Windows + *drvsize = 0; + *rootsize = 0; + } +#undef SEP_OR_END +#undef IS_SEP +#undef IS_END +#endif +} + // The caller must ensure "buffer" is big enough. static int join_relfile(wchar_t *buffer, size_t bufsize, @@ -2408,49 +2501,39 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) #endif #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) - // Skip leading '.\' if (p1[0] == L'.' && IS_SEP(&p1[1])) { + // Skip leading '.\' path = &path[2]; - while (IS_SEP(path) && !IS_END(path)) { + while (IS_SEP(path)) { path++; } p1 = p2 = minP2 = path; lastC = SEP; } + else { + Py_ssize_t drvsize, rootsize; + _Py_skiproot(path, size, &drvsize, &rootsize); + if (drvsize || rootsize) { + // Skip past root and update minP2 + p1 = &path[drvsize + rootsize]; +#ifndef ALTSEP + p2 = p1; +#else + for (; p2 < p1; ++p2) { + if (*p2 == ALTSEP) { + *p2 = SEP; + } + } +#endif + minP2 = p2 - 1; + lastC = *minP2; #ifdef MS_WINDOWS - // Skip past drive segment and update minP2 - else if (p1[0] && p1[1] == L':') { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2; - lastC = L':'; - } - // Skip past all \\-prefixed paths, including \\?\, \\.\, - // and network paths, including the first segment. - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) { - int sepCount = 2; - *p2++ = SEP; - *p2++ = SEP; - p1 += 2; - for (; !IS_END(p1) && sepCount; ++p1) { - if (IS_SEP(p1)) { - --sepCount; - *p2++ = lastC = SEP; - } else { - *p2++ = lastC = *p1; + if (lastC != SEP) { + minP2++; } +#endif } - minP2 = p2 - 1; - } -#else - // Skip past two leading SEPs - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2 - 1; // Absolute path has SEP at minP2 - lastC = SEP; } -#endif /* MS_WINDOWS */ /* if pEnd is specified, check that. Else, check for null terminator */ for (; !IS_END(p1); ++p1) { From d51eea6b919b8a838e8891b6ba5e659b6c0d6c23 Mon Sep 17 00:00:00 2001 From: Nineteendo Date: Tue, 28 May 2024 12:39:28 +0200 Subject: [PATCH 2/5] Direct C call for `os.path.splitroot()` --- Lib/ntpath.py | 32 ++++------------------------ Lib/posixpath.py | 31 ++++----------------------- Lib/test/test_ntpath.py | 2 ++ Lib/test/test_posixpath.py | 2 ++ Modules/clinic/posixmodule.c.h | 23 ++++++++++---------- Modules/posixmodule.c | 38 ++++++++++++++++++++++------------ 6 files changed, 49 insertions(+), 79 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 87e40738a88d97..fe355b98907373 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -177,19 +177,12 @@ def splitdrive(p): try: - from nt import _path_splitroot_ex + from nt import _path_splitroot_ex as splitroot except ImportError: def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ + """Split a pathname into drive, root and tail. + + The tail contains anything after the root.""" p = os.fspath(p) if isinstance(p, bytes): sep = b'\\' @@ -229,23 +222,6 @@ def splitroot(p): else: # Relative path, e.g. Windows return empty, empty, p -else: - def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) - return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) - return _path_splitroot_ex(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 5be9087a7650ad..0a47f96be3fb03 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -136,18 +136,12 @@ def splitdrive(p): try: - from posix import _path_splitroot_ex + from posix import _path_splitroot_ex as splitroot except ImportError: def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ + """Split a pathname into drive, root and tail. + + The tail contains anything after the root.""" p = os.fspath(p) if isinstance(p, bytes): sep = b'/' @@ -165,23 +159,6 @@ def splitroot(p): # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 return empty, p[:2], p[2:] -else: - def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - # Optimisation: the drive is always empty - _, root, tail = _path_splitroot_ex(os.fsdecode(p)) - return b'', os.fsencode(root), os.fsencode(tail) - return _path_splitroot_ex(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index db8376bed7a72b..79aceff4d95f60 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -1011,6 +1011,8 @@ def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks in # genericpath.py. + self.assertTrue(os.path.splitroot is nt._path_splitroot_ex) + self.assertFalse(inspect.isfunction(os.path.splitroot)) self.assertTrue(os.path.normpath is nt._path_normpath) self.assertFalse(inspect.isfunction(os.path.normpath)) self.assertTrue(os.path.isdir is nt._path_isdir) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 932d8a35d31fa9..7d9787c2328d44 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -279,6 +279,8 @@ def test_isjunction(self): def test_fast_paths_in_use(self): # There are fast paths of these functions implemented in posixmodule.c. # Confirm that they are being used, and not the Python fallbacks + self.assertTrue(os.path.splitroot is posix._path_splitroot_ex) + self.assertFalse(inspect.isfunction(os.path.splitroot)) self.assertTrue(os.path.normpath is posix._path_normpath) self.assertFalse(inspect.isfunction(os.path.normpath)) diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index bdff5ae00fcedb..ca01d4fb36fb01 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2221,13 +2221,16 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(os__path_splitroot_ex__doc__, "_path_splitroot_ex($module, /, path)\n" "--\n" -"\n"); +"\n" +"Split a pathname into drive, root and tail.\n" +"\n" +"The tail contains anything after the root."); #define OS__PATH_SPLITROOT_EX_METHODDEF \ {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path); +os__path_splitroot_ex_impl(PyObject *module, path_t *path); static PyObject * os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -2259,23 +2262,21 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - PyObject *path; + path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "path", 0, 1, 1, 0, 0); args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { goto exit; } - if (!PyUnicode_Check(args[0])) { - _PyArg_BadArgument("_path_splitroot_ex", "argument 'path'", "str", args[0]); - goto exit; - } - if (PyUnicode_READY(args[0]) == -1) { + if (!path_converter(args[0], &path)) { goto exit; } - path = args[0]; - return_value = os__path_splitroot_ex_impl(module, path); + return_value = os__path_splitroot_ex_impl(module, &path); exit: + /* Cleanup for path */ + path_cleanup(&path); + return return_value; } @@ -12075,4 +12076,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=66b9ec0427c8ff23 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=96bef929fd0eccb4 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f16395828b6362..45bf272e79fa6f 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5255,23 +5255,22 @@ os__path_islink_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitroot_ex - path: unicode + path: path_t(make_wide=True, nonstrict=True) + +Split a pathname into drive, root and tail. +The tail contains anything after the root. [clinic start generated code]*/ static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path) -/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/ +os__path_splitroot_ex_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=4b0072b6cdf4b611 input=6eb76e9173412c92]*/ { - Py_ssize_t len, drvsize, rootsize; + Py_ssize_t drvsize, rootsize; PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; - wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); - if (!buffer) { - goto exit; - } - - _Py_skiproot(buffer, len, &drvsize, &rootsize); + const wchar_t *buffer = path->wide; + _Py_skiproot(buffer, path->length, &drvsize, &rootsize); drv = PyUnicode_FromWideChar(buffer, drvsize); if (drv == NULL) { goto exit; @@ -5281,13 +5280,26 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *path) goto exit; } tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], - len - drvsize - rootsize); + path->length - drvsize - rootsize); if (tail == NULL) { goto exit; } - result = Py_BuildValue("(OOO)", drv, root, tail); + if (PyBytes_Check(path->object)) { + Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); + if (drv == NULL) { + goto exit; + } + Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); + if (root == NULL) { + goto exit; + } + Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); + if (tail == NULL) { + goto exit; + } + } + result = PyTuple_Pack(3, drv, root, tail); exit: - PyMem_Free(buffer); Py_XDECREF(drv); Py_XDECREF(root); Py_XDECREF(tail); From 0b9547f6cc38908f2e44b93fe35b2e3343677bf1 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 14:43:27 +0000 Subject: [PATCH 3/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst new file mode 100644 index 00000000000000..b395bad0c82fb6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst @@ -0,0 +1 @@ +Fix :func:`os.path.normpath` for UNC paths on Windows. From e800e8573bdf0e49deb92ab2c949f853572c54cb Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Fri, 31 May 2024 16:43:49 +0200 Subject: [PATCH 4/5] Delete Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst --- .../2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst deleted file mode 100644 index dfdf250710778e..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst +++ /dev/null @@ -1 +0,0 @@ -Speed up :func:`os.path.splitroot` with a native implementation. From 9b930a5f9cf390d2e761e5c99951ed256a166fe1 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Wed, 19 Jun 2024 13:17:48 +0200 Subject: [PATCH 5/5] Rename 2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst to 2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst --- .../2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core and Builtins => Library}/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst (100%) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst b/Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst similarity index 100% rename from Misc/NEWS.d/next/Core and Builtins/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst rename to Misc/NEWS.d/next/Library/2024-05-31-14-43-25.gh-issue-102511.kl4x8H.rst pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy