-
-
Notifications
You must be signed in to change notification settings - Fork 32.6k
bpo-29659: Expose copyfileobj() length
arg for public use
#328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,8 +73,14 @@ class RegistryError(Exception): | |
and unpacking registries fails""" | ||
|
||
|
||
def copyfileobj(fsrc, fdst, length=16*1024): | ||
"""copy data from file-like object fsrc to file-like object fdst""" | ||
def copyfileobj(fsrc, fdst, length=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. def copyfileobj(fsrc, fdst, length=16*1024): I personally prefer this way in my code to concentrate 2 lines worth of code into one. if not length:
length = 16 * 1024 It looks a little nicer and not to mention saves lines of code that basically do the same thing. |
||
"""Copy data from file-like object `fsrc` to file-like object `fdst`. | ||
|
||
An in-memory buffer size in bytes can be set with `length`; the default is | ||
16 KiB. | ||
""" | ||
if not length: | ||
length = 16 * 1024 | ||
while 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that it's ok to use a loop if the length is negative. I suggest to have a special case for negative value calling read() (no parameter) only once. |
||
buf = fsrc.read(length) | ||
if not buf: | ||
|
@@ -93,12 +99,14 @@ def _samefile(src, dst): | |
return (os.path.normcase(os.path.abspath(src)) == | ||
os.path.normcase(os.path.abspath(dst))) | ||
|
||
def copyfile(src, dst, *, follow_symlinks=True): | ||
def copyfile(src, dst, *, follow_symlinks=True, length=None): | ||
"""Copy data from src to dst. | ||
|
||
If follow_symlinks is not set and src is a symbolic link, a new | ||
symlink will be created instead of copying the file it points to. | ||
|
||
An in-memory buffer size in bytes can be set with `length`; the default is | ||
16 KiB. | ||
""" | ||
if _samefile(src, dst): | ||
raise SameFileError("{!r} and {!r} are the same file".format(src, dst)) | ||
|
@@ -119,7 +127,7 @@ def copyfile(src, dst, *, follow_symlinks=True): | |
else: | ||
with open(src, 'rb') as fsrc: | ||
with open(dst, 'wb') as fdst: | ||
copyfileobj(fsrc, fdst) | ||
copyfileobj(fsrc, fdst, length=length) | ||
return dst | ||
|
||
def copymode(src, dst, *, follow_symlinks=True): | ||
|
@@ -224,7 +232,7 @@ def lookup(name): | |
raise | ||
_copyxattr(src, dst, follow_symlinks=follow) | ||
|
||
def copy(src, dst, *, follow_symlinks=True): | ||
def copy(src, dst, *, follow_symlinks=True, length=None): | ||
"""Copy data and mode bits ("cp src dst"). Return the file's destination. | ||
|
||
The destination may be a directory. | ||
|
@@ -235,14 +243,17 @@ def copy(src, dst, *, follow_symlinks=True): | |
If source and destination are the same file, a SameFileError will be | ||
raised. | ||
|
||
An in-memory buffer size in bytes can be set with `length`; the default is | ||
16 KiB. | ||
|
||
""" | ||
if os.path.isdir(dst): | ||
dst = os.path.join(dst, os.path.basename(src)) | ||
copyfile(src, dst, follow_symlinks=follow_symlinks) | ||
copyfile(src, dst, follow_symlinks=follow_symlinks, length=length) | ||
copymode(src, dst, follow_symlinks=follow_symlinks) | ||
return dst | ||
|
||
def copy2(src, dst, *, follow_symlinks=True): | ||
def copy2(src, dst, *, follow_symlinks=True, length=None): | ||
"""Copy data and all stat info ("cp -p src dst"). Return the file's | ||
destination." | ||
|
||
|
@@ -251,10 +262,12 @@ def copy2(src, dst, *, follow_symlinks=True): | |
If follow_symlinks is false, symlinks won't be followed. This | ||
resembles GNU's "cp -P src dst". | ||
|
||
An in-memory buffer size in bytes can be set with `length`; the default is | ||
16 KiB. | ||
""" | ||
if os.path.isdir(dst): | ||
dst = os.path.join(dst, os.path.basename(src)) | ||
copyfile(src, dst, follow_symlinks=follow_symlinks) | ||
copyfile(src, dst, follow_symlinks=follow_symlinks, length=length) | ||
copystat(src, dst, follow_symlinks=follow_symlinks) | ||
return dst | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"A negative length value means to copy the data without looping over the source data in chunks"
I dislike this definition. In practice, negative means "unlimited" buffer size: the whole input file is loaded into memory.
I'm not sure that it's a good practice to try to load files of unknown size into memory.
I suggest to remove this feature which seems more like a side effect than a carefully designed API.
If you want to get fast copy, pass a very large length like 1 GB. But if Python starts to load 1 TB into memory, it's likely to crash the system... At least, to slow down the system, a lot.