diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index e70d038e61d82d..852b68437a2653 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -748,6 +748,44 @@ Connection Objects .. versionadded:: 3.11 + .. method:: serialize(*, name="main") + + This method serializes a database into a :class:`bytes` object. For an + ordinary on-disk database file, the serialization is just a copy of the + disk file. For an in-memory database or a "temp" database, the + serialization is the same sequence of bytes which would be written to + disk if that database were backed up to disk. + + *name* is the database to be serialized, and defaults to the main + database. + + .. note:: + + This method is only available if the underlying SQLite library has the + serialize API. + + .. versionadded:: 3.11 + + + .. method:: deserialize(data, /, *, name="main") + + This method causes the database connection to disconnect from database + *name*, and reopen *name* as an in-memory database based on the + serialization contained in *data*. Deserialization will raise + :exc:`OperationalError` if the database connection is currently involved + in a read transaction or a backup operation. :exc:`DataError` will be + raised if ``len(data)`` is larger than ``2**63 - 1``, and + :exc:`DatabaseError` will be raised if *data* does not contain a valid + SQLite database. + + .. note:: + + This method is only available if the underlying SQLite library has the + deserialize API. + + .. versionadded:: 3.11 + + .. _sqlite3-cursor-objects: Cursor Objects diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index d0c10a91009970..c312645c31cd79 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -366,6 +366,11 @@ sqlite3 Instead we leave it to the SQLite library to handle these cases. (Contributed by Erlend E. Aasland in :issue:`44092`.) +* Add :meth:`~sqlite3.Connection.serialize` and + :meth:`~sqlite3.Connection.deserialize` to :class:`sqlite3.Connection` for + serializing and deserializing databases. + (Contributed by Erlend E. Aasland in :issue:`41930`.) + sys --- diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py index 177c2cd327ff3c..02482816cb9332 100644 --- a/Lib/test/test_sqlite3/test_dbapi.py +++ b/Lib/test/test_sqlite3/test_dbapi.py @@ -29,6 +29,7 @@ from test.support import ( SHORT_TIMEOUT, + bigmemtest, check_disallow_instantiation, threading_helper, ) @@ -603,6 +604,56 @@ def test_uninit_operations(self): func) +@unittest.skipUnless(hasattr(sqlite.Connection, "serialize"), + "Needs SQLite serialize API") +class SerializeTests(unittest.TestCase): + def test_serialize_deserialize(self): + with memory_database() as cx: + with cx: + cx.execute("create table t(t)") + data = cx.serialize() + self.assertEqual(len(data), 8192) + + # Remove test table, verify that it was removed. + with cx: + cx.execute("drop table t") + regex = "no such table" + with self.assertRaisesRegex(sqlite.OperationalError, regex): + cx.execute("select t from t") + + # Deserialize and verify that test table is restored. + cx.deserialize(data) + cx.execute("select t from t") + + def test_deserialize_wrong_args(self): + dataset = ( + (BufferError, memoryview(b"blob")[::2]), + (TypeError, []), + (TypeError, 1), + (TypeError, None), + ) + for exc, arg in dataset: + with self.subTest(exc=exc, arg=arg): + with memory_database() as cx: + self.assertRaises(exc, cx.deserialize, arg) + + def test_deserialize_corrupt_database(self): + with memory_database() as cx: + regex = "file is not a database" + with self.assertRaisesRegex(sqlite.DatabaseError, regex): + cx.deserialize(b"\0\1\3") + # SQLite does not generate an error until you try to query the + # deserialized database. + cx.execute("create table fail(f)") + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=2**63, memuse=3, dry_run=False) + def test_deserialize_too_much_data_64bit(self): + with memory_database() as cx: + with self.assertRaisesRegex(OverflowError, "'data' is too large"): + cx.deserialize(b"b" * size) + + class OpenTests(unittest.TestCase): _sql = "create table test(id integer)" @@ -1030,6 +1081,10 @@ def test_check_connection_thread(self): lambda: self.con.setlimit(sqlite.SQLITE_LIMIT_LENGTH, -1), lambda: self.con.getlimit(sqlite.SQLITE_LIMIT_LENGTH), ] + if hasattr(sqlite.Connection, "serialize"): + fns.append(lambda: self.con.serialize()) + fns.append(lambda: self.con.deserialize(b"")) + for fn in fns: with self.subTest(fn=fn): self._run_test(fn) diff --git a/Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst b/Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst new file mode 100644 index 00000000000000..ce494e7225e22c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst @@ -0,0 +1,3 @@ +Add :meth:`~sqlite3.Connection.serialize` and +:meth:`~sqlite3.Connection.deserialize` support to :mod:`sqlite3`. Patch by +Erlend E. Aasland. diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h index 111e344fd2ae18..99ef94ecd71ecd 100644 --- a/Modules/_sqlite/clinic/connection.c.h +++ b/Modules/_sqlite/clinic/connection.c.h @@ -693,6 +693,156 @@ pysqlite_connection_create_collation(pysqlite_Connection *self, PyTypeObject *cl return return_value; } +#if defined(PY_SQLITE_HAVE_SERIALIZE) + +PyDoc_STRVAR(serialize__doc__, +"serialize($self, /, *, name=\'main\')\n" +"--\n" +"\n" +"Serialize a database into a byte string.\n" +"\n" +" name\n" +" Which database to serialize.\n" +"\n" +"For an ordinary on-disk database file, the serialization is just a copy of the\n" +"disk file. For an in-memory database or a \"temp\" database, the serialization is\n" +"the same sequence of bytes which would be written to disk if that database\n" +"were backed up to disk."); + +#define SERIALIZE_METHODDEF \ + {"serialize", (PyCFunction)(void(*)(void))serialize, METH_FASTCALL|METH_KEYWORDS, serialize__doc__}, + +static PyObject * +serialize_impl(pysqlite_Connection *self, const char *name); + +static PyObject * +serialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"name", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "serialize", 0}; + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + const char *name = "main"; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("serialize", "argument 'name'", "str", args[0]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_kwonly: + return_value = serialize_impl(self, name); + +exit: + return return_value; +} + +#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */ + +#if defined(PY_SQLITE_HAVE_SERIALIZE) + +PyDoc_STRVAR(deserialize__doc__, +"deserialize($self, data, /, *, name=\'main\')\n" +"--\n" +"\n" +"Load a serialized database.\n" +"\n" +" data\n" +" The serialized database content.\n" +" name\n" +" Which database to reopen with the deserialization.\n" +"\n" +"The deserialize interface causes the database connection to disconnect from the\n" +"target database, and then reopen it as an in-memory database based on the given\n" +"serialized data.\n" +"\n" +"The deserialize interface will fail with SQLITE_BUSY if the database is\n" +"currently in a read transaction or is involved in a backup operation."); + +#define DESERIALIZE_METHODDEF \ + {"deserialize", (PyCFunction)(void(*)(void))deserialize, METH_FASTCALL|METH_KEYWORDS, deserialize__doc__}, + +static PyObject * +deserialize_impl(pysqlite_Connection *self, Py_buffer *data, + const char *name); + +static PyObject * +deserialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "name", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "deserialize", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + const char *name = "main"; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyUnicode_Check(args[0])) { + Py_ssize_t len; + const char *ptr = PyUnicode_AsUTF8AndSize(args[0], &len); + if (ptr == NULL) { + goto exit; + } + PyBuffer_FillInfo(&data, args[0], (void *)ptr, len, 1, 0); + } + else { /* any bytes-like object */ + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&data, 'C')) { + _PyArg_BadArgument("deserialize", "argument 1", "contiguous buffer", args[0]); + goto exit; + } + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("deserialize", "argument 'name'", "str", args[1]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[1], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_kwonly: + return_value = deserialize_impl(self, &data, name); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */ + PyDoc_STRVAR(pysqlite_connection_enter__doc__, "__enter__($self, /)\n" "--\n" @@ -832,4 +982,12 @@ getlimit(pysqlite_Connection *self, PyObject *arg) #ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */ -/*[clinic end generated code: output=176c9095219b17c4 input=a9049054013a1b77]*/ + +#ifndef SERIALIZE_METHODDEF + #define SERIALIZE_METHODDEF +#endif /* !defined(SERIALIZE_METHODDEF) */ + +#ifndef DESERIALIZE_METHODDEF + #define DESERIALIZE_METHODDEF +#endif /* !defined(DESERIALIZE_METHODDEF) */ +/*[clinic end generated code: output=d965a68f9229a56c input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 37f6d0fa5a502e..9d187cfa99d232 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -1818,6 +1818,125 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self, Py_RETURN_NONE; } +#ifdef PY_SQLITE_HAVE_SERIALIZE +/*[clinic input] +_sqlite3.Connection.serialize as serialize + + * + name: str = "main" + Which database to serialize. + +Serialize a database into a byte string. + +For an ordinary on-disk database file, the serialization is just a copy of the +disk file. For an in-memory database or a "temp" database, the serialization is +the same sequence of bytes which would be written to disk if that database +were backed up to disk. +[clinic start generated code]*/ + +static PyObject * +serialize_impl(pysqlite_Connection *self, const char *name) +/*[clinic end generated code: output=97342b0e55239dd3 input=d2eb5194a65abe2b]*/ +{ + if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { + return NULL; + } + + /* If SQLite has a contiguous memory representation of the database, we can + * avoid memory allocations, so we try with the no-copy flag first. + */ + sqlite3_int64 size; + unsigned int flags = SQLITE_SERIALIZE_NOCOPY; + const char *data; + + Py_BEGIN_ALLOW_THREADS + data = (const char *)sqlite3_serialize(self->db, name, &size, flags); + if (data == NULL) { + flags &= ~SQLITE_SERIALIZE_NOCOPY; + data = (const char *)sqlite3_serialize(self->db, name, &size, flags); + } + Py_END_ALLOW_THREADS + + if (data == NULL) { + PyErr_Format(self->OperationalError, "unable to serialize '%s'", + name); + return NULL; + } + PyObject *res = PyBytes_FromStringAndSize(data, size); + if (!(flags & SQLITE_SERIALIZE_NOCOPY)) { + sqlite3_free((void *)data); + } + return res; +} + +/*[clinic input] +_sqlite3.Connection.deserialize as deserialize + + data: Py_buffer(accept={buffer, str}) + The serialized database content. + / + * + name: str = "main" + Which database to reopen with the deserialization. + +Load a serialized database. + +The deserialize interface causes the database connection to disconnect from the +target database, and then reopen it as an in-memory database based on the given +serialized data. + +The deserialize interface will fail with SQLITE_BUSY if the database is +currently in a read transaction or is involved in a backup operation. +[clinic start generated code]*/ + +static PyObject * +deserialize_impl(pysqlite_Connection *self, Py_buffer *data, + const char *name) +/*[clinic end generated code: output=e394c798b98bad89 input=1be4ca1faacf28f2]*/ +{ + if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { + return NULL; + } + + /* Transfer ownership of the buffer to SQLite: + * - Move buffer from Py to SQLite + * - Tell SQLite to free buffer memory + * - Tell SQLite that it is permitted to grow the resulting database + * + * Make sure we don't overflow sqlite3_deserialize(); it accepts a signed + * 64-bit int as its data size argument. + * + * We can safely use sqlite3_malloc64 here, since it was introduced before + * the serialize APIs. + */ + if (data->len > 9223372036854775807) { // (1 << 63) - 1 + PyErr_SetString(PyExc_OverflowError, "'data' is too large"); + return NULL; + } + + sqlite3_int64 size = (sqlite3_int64)data->len; + unsigned char *buf = sqlite3_malloc64(size); + if (buf == NULL) { + return PyErr_NoMemory(); + } + + const unsigned int flags = SQLITE_DESERIALIZE_FREEONCLOSE | + SQLITE_DESERIALIZE_RESIZEABLE; + int rc; + Py_BEGIN_ALLOW_THREADS + (void)memcpy(buf, data->buf, data->len); + rc = sqlite3_deserialize(self->db, name, buf, size, size, flags); + Py_END_ALLOW_THREADS + + if (rc != SQLITE_OK) { + (void)_pysqlite_seterror(self->state, self->db); + return NULL; + } + Py_RETURN_NONE; +} +#endif // PY_SQLITE_HAVE_SERIALIZE + + /*[clinic input] _sqlite3.Connection.__enter__ as pysqlite_connection_enter @@ -1971,6 +2090,8 @@ static PyMethodDef connection_methods[] = { PYSQLITE_CONNECTION_SET_TRACE_CALLBACK_METHODDEF SETLIMIT_METHODDEF GETLIMIT_METHODDEF + SERIALIZE_METHODDEF + DESERIALIZE_METHODDEF {NULL, NULL} }; diff --git a/PCbuild/_sqlite3.vcxproj b/PCbuild/_sqlite3.vcxproj index e268c473f4c985..9cff43f73e5bec 100644 --- a/PCbuild/_sqlite3.vcxproj +++ b/PCbuild/_sqlite3.vcxproj @@ -94,6 +94,7 @@ $(sqlite3Dir);%(AdditionalIncludeDirectories) + PY_SQLITE_HAVE_SERIALIZE;%(PreprocessorDefinitions) diff --git a/configure b/configure index 44912b9c34df83..69b12309de5789 100755 --- a/configure +++ b/configure @@ -12902,6 +12902,50 @@ if test "x$ac_cv_lib_sqlite3_sqlite3_load_extension" = xyes; then : else have_sqlite3_load_extension=no +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sqlite3_serialize in -lsqlite3" >&5 +$as_echo_n "checking for sqlite3_serialize in -lsqlite3... " >&6; } +if ${ac_cv_lib_sqlite3_sqlite3_serialize+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsqlite3 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sqlite3_serialize (); +int +main () +{ +return sqlite3_serialize (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_sqlite3_sqlite3_serialize=yes +else + ac_cv_lib_sqlite3_sqlite3_serialize=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sqlite3_sqlite3_serialize" >&5 +$as_echo "$ac_cv_lib_sqlite3_sqlite3_serialize" >&6; } +if test "x$ac_cv_lib_sqlite3_sqlite3_serialize" = xyes; then : + + +$as_echo "#define PY_SQLITE_HAVE_SERIALIZE 1" >>confdefs.h + + fi diff --git a/configure.ac b/configure.ac index c02adf7bf3f140..5860595b752c8c 100644 --- a/configure.ac +++ b/configure.ac @@ -3605,6 +3605,12 @@ dnl hence CPPFLAGS instead of CFLAGS. [have_sqlite3_load_extension=yes], [have_sqlite3_load_extension=no] ) + AC_CHECK_LIB([sqlite3], [sqlite3_serialize], [ + AC_DEFINE( + [PY_SQLITE_HAVE_SERIALIZE], [1], + [Define if SQLite was compiled with the serialize API] + ) + ]) ], [ have_supported_sqlite3=no ]) diff --git a/pyconfig.h.in b/pyconfig.h.in index be776f734163f1..4ac054a28d8ef1 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1506,6 +1506,9 @@ /* Define to 1 to build the sqlite module with loadable extensions support. */ #undef PY_SQLITE_ENABLE_LOAD_EXTENSION +/* Define if SQLite was compiled with the serialize API */ +#undef PY_SQLITE_HAVE_SERIALIZE + /* Default cipher suites list for ssl module. 1: Python's preferred selection, 2: leave OpenSSL defaults untouched, 0: custom string */ #undef PY_SSL_DEFAULT_CIPHERS pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy