diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 1197b575c00455..540479f606fa07 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -206,24 +206,32 @@ provides three different variants: .. versionadded:: 3.2 - .. method:: send_error(code, message=None, explain=None) + .. method:: send_error(code, message=None, explain=None, *, extra_headers=()) Sends and logs a complete error reply to the client. The numeric *code* specifies the HTTP error code, with *message* as an optional, short, human readable description of the error. The *explain* argument can be used to provide more detailed information about the error; it will be formatted using the :attr:`error_message_format` attribute and emitted, after - a complete set of headers, as the response body. The :attr:`responses` - attribute holds the default values for *message* and *explain* that - will be used if no value is provided; for unknown codes the default value - for both is the string ``???``. The body will be empty if the method is - HEAD or the response code is one of the following: :samp:`1{xx}`, - ``204 No Content``, ``205 Reset Content``, ``304 Not Modified``. + a complete set of headers, as the response body. + + The *extra_headers* argument can be a key-value tuple list which + specifies additional headers to be sent in the response (for + instance, ``[("Content-Range", "bytes 3-14/42")]``). + + The :attr:`responses` attribute holds the default values for *message* + and *explain* that will be used if no value is provided; for unknown codes + the default value for both is the string ``???``. The body will be empty if + the method is HEAD or the response code is one of the following: :samp:`1{xx}`, + ``204 No Content``, ``205 Reset Content``, or ``304 Not Modified``. .. versionchanged:: 3.4 The error response includes a Content-Length header. Added the *explain* argument. + .. versionchanged:: next + Added the *extra_headers* argument. + .. method:: send_response(code, message=None) Adds a response header to the headers buffer and logs the accepted @@ -334,6 +342,10 @@ provides three different variants: .. versionchanged:: 3.9 The *directory* parameter accepts a :term:`path-like object`. + .. versionchanged:: next + Added support for HTTP single-part range requests on files, as specified + in :rfc:`9110#section-14`. + A lot of the work, such as parsing the request, is done by the base class :class:`BaseHTTPRequestHandler`. This class implements the :func:`do_GET` and :func:`do_HEAD` functions. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 474bd6a24b8ece..bb03ba60607c91 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -433,6 +433,11 @@ http module allow the browser to apply its default dark mode. (Contributed by Yorik Hansen in :gh:`123430`.) +* Added support for HTTP single-part range requests on files to + :class:`~http.server.SimpleHTTPRequestHandler`, as specified in + :rfc:`9110#section-14`. + (Contributed by Andy Ling in :gh:`86809`.) + inspect ------- @@ -1102,6 +1107,10 @@ Changes in the Python API This temporary change affects other threads. (Contributed by Serhiy Storchaka in :gh:`69998`.) +* The :meth:`BaseHTTPRequestHandler.send_error ` + method has a new optional parameter *extra_headers*. + Subclasses should update their implementations to accept this new parameter. + (Contributed by Andy Ling in :gh:`86809`.) Build changes ============= diff --git a/Lib/http/server.py b/Lib/http/server.py index a90c8d34c394db..432713f05f2258 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -97,6 +97,7 @@ import mimetypes import os import posixpath +import re import select import shutil import socket @@ -131,6 +132,7 @@ """ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" +RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.ASCII | re.IGNORECASE) class HTTPServer(socketserver.TCPServer): @@ -443,7 +445,7 @@ def handle(self): while not self.close_connection: self.handle_one_request() - def send_error(self, code, message=None, explain=None): + def send_error(self, code, message=None, explain=None, *, extra_headers=()): """Send and log an error reply. Arguments are @@ -454,6 +456,7 @@ def send_error(self, code, message=None, explain=None): defaults to short entry matching the response code * explain: a detailed message defaults to the long entry matching the response code. + * extra_headers: extra headers to be included in the response This sends an error response (so it must be called before any output has been generated), logs the error, and finally sends @@ -491,6 +494,8 @@ def send_error(self, code, message=None, explain=None): body = content.encode('UTF-8', 'replace') self.send_header("Content-Type", self.error_content_type) self.send_header('Content-Length', str(len(body))) + for name, value in extra_headers: + self.send_header(name, value) self.end_headers() if self.command != 'HEAD' and body: @@ -682,7 +687,7 @@ def do_GET(self): f = self.send_head() if f: try: - self.copyfile(f, self.wfile) + self.copyfile(f, self.wfile, range=self._range) finally: f.close() @@ -705,6 +710,7 @@ def send_head(self): """ path = self.translate_path(self.path) f = None + self._range = self.parse_range() if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) if not parts.path.endswith('/'): @@ -769,9 +775,44 @@ def send_head(self): f.close() return None - self.send_response(HTTPStatus.OK) + if self._range: + start, end = self._range + if start is None: + # parse_range() collapses (None, None) to None as it's invalid + # https://github.com/python/cpython/pull/118949#discussion_r1912397525 + assert end is not None + # `end` here means suffix length + start = max(0, fs.st_size - end) + end = fs.st_size - 1 + elif end is None or end >= fs.st_size: + end = fs.st_size - 1 + + if start == 0 and end >= fs.st_size - 1: + # Send entire file + self._range = None + elif start >= fs.st_size: + # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that + # none of the range values overlap the extent of + # the resource + f.close() + headers = [('Content-Range', f'bytes */{fs.st_size}')] + self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, + extra_headers=headers) + return None + + if self._range: + self.send_response(HTTPStatus.PARTIAL_CONTENT) + self.send_header("Content-Range", + f"bytes {start}-{end}/{fs.st_size}") + self.send_header("Content-Length", str(end - start + 1)) + + # Update range to be sent to be used later in copyfile + self._range = (start, end) + else: + self.send_response(HTTPStatus.OK) + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Length", str(fs.st_size)) self.send_header("Content-type", ctype) - self.send_header("Content-Length", str(fs[6])) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() @@ -868,13 +909,15 @@ def translate_path(self, path): path += '/' return path - def copyfile(self, source, outputfile): - """Copy all data between two file objects. + def copyfile(self, source, outputfile, *, range=None): + """Copy all data between two file objects if range is None. + Otherwise, copy data between two file objects based on the + inclusive range (start, end). The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). + (or anything with read() and seek() method) and the + DESTINATION argument is a file object open for writing + (or anything with a write() method). The only reason for overriding this would be to change the block size or perhaps to replace newlines by CRLF @@ -882,7 +925,18 @@ def copyfile(self, source, outputfile): to copy binary data as well. """ - shutil.copyfileobj(source, outputfile) + if range is None: + shutil.copyfileobj(source, outputfile) + else: + start, end = range + length = end - start + 1 + source.seek(start) + while length > 0: + buf = source.read(min(length, shutil.COPY_BUFSIZE)) + if not buf: + raise EOFError('File shrank after size was checked') + length -= len(buf) + outputfile.write(buf) def guess_type(self, path): """Guess the type of a file. @@ -909,6 +963,34 @@ def guess_type(self, path): return guess return 'application/octet-stream' + def parse_range(self): + """Return a tuple of (start, end) representing the range header in + the HTTP request. If the range header is missing or not resolvable, + this returns None. + + This currently only supports single part ranges. + + """ + range_header = self.headers.get('range') + if range_header is None: + return None + m = RANGE_REGEX_PATTERN.match(range_header) + # Ignore invalid Range header and return None + # https://datatracker.ietf.org/doc/html/rfc9110#name-range + if m is None: + return None + + start = int(m.group(1)) if m.group(1) else None + end = int(m.group(2)) if m.group(2) else None + + if start is None and end is None: + return None + + if start is not None and end is not None and start > end: + return None + + return start, end + # Utilities for CGIHTTPRequestHandler diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 1c370dcafa9fea..d841da12b7effe 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -539,6 +539,83 @@ def test_get(self): finally: os.chmod(self.tempdir, 0o755) + def test_single_range_get(self): + route = self.base_url + '/test' + response = self.request(route) + self.assertEqual(response.getheader('accept-ranges'), 'bytes') + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + # valid ranges + response = self.request(route, headers={'Range': 'bYtEs=3-12'}) # case insensitive + self.assertEqual(response.getheader('content-range'), 'bytes 3-12/30') + self.assertEqual(response.getheader('content-length'), '10') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:13]) + + response = self.request(route, headers={'Range': 'bytes=3-'}) + self.assertEqual(response.getheader('content-range'), 'bytes 3-29/30') + self.assertEqual(response.getheader('content-length'), '27') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:]) + + response = self.request(route, headers={'Range': 'bytes=-5'}) + self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') + self.assertEqual(response.getheader('content-length'), '5') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + + response = self.request(route, headers={'Range': 'bytes=29-29'}) + self.assertEqual(response.getheader('content-range'), 'bytes 29-29/30') + self.assertEqual(response.getheader('content-length'), '1') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[29:]) + + # end > file size + response = self.request(route, headers={'Range': 'bytes=25-100'}) + self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') + self.assertEqual(response.getheader('content-length'), '5') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + + # invalid ranges + response = self.request(route, headers={'Range': 'bytes=100-200'}) + self.assertEqual(response.getheader('content-range'), 'bytes */30') + self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + + response = self.request(route, headers={'Range': 'bytes=4-3'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes=wrong format'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes=-'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes=--'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes='}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + def test_single_range_get_empty(self): + # range requests to an empty file + os_helper.create_empty_file(os.path.join(self.tempdir_name, 'empty')) + empty_path = self.base_url + '/empty' + + response = self.request(empty_path, headers={'Range': 'bytes=0-512'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=b'') + + response = self.request(empty_path, headers={'Range': 'bytes=-512'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=b'') + + response = self.request(empty_path, headers={'Range': 'bytes=1-2'}) + self.assertEqual(response.getheader('content-range'), 'bytes */0') + self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + + # invalid Range header is always ignored + response = self.request(empty_path, headers={'Range': 'bytes=5-4'}) + self.check_status_and_reason(response, HTTPStatus.OK) + + def test_multi_range_get(self): + # multipart ranges (not supported currently) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=1-2, 4-7'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst new file mode 100644 index 00000000000000..deda1ee00b78aa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -0,0 +1 @@ +Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`9110#section-14`. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy