Skip to content

Commit 45138d3

Browse files
authored
gh-131876: extract _hashlib helpers into a separate directory (#136995)
The `Modules/hashlib.h` helper file is now removed and split into multiple files: * `Modules/_hashlib/hashlib_buffer.[ch]` -- Utilities for getting a buffer view and handling buffer inputs. * `Modules/_hashlib/hashlib_fetch.h` -- Utilities used when fetching a message digest from a digest-like identifier. Currently, this file only contains common error messages as the fetching API is not yet implemented. * `Modules/_hashlib/hashlib_mutex.h` -- Utilities for managing the lock on cryptographic hash objects.
1 parent eefd70f commit 45138d3

20 files changed

+346
-221
lines changed

Makefile.pre.in

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ ENSUREPIP= @ENSUREPIP@
227227
# Internal static libraries
228228
LIBMPDEC_A= Modules/_decimal/libmpdec/libmpdec.a
229229
LIBEXPAT_A= Modules/expat/libexpat.a
230+
LIBHASHLIB_INTERNAL_A=Modules/_hashlib/libhashlib.a
230231

231232
# HACL* build configuration
232233
LIBHACL_CFLAGS=@LIBHACL_CFLAGS@
@@ -761,6 +762,17 @@ LIBHACL_HMAC_HEADERS= \
761762
$(LIBHACL_BLAKE2_HEADERS) \
762763
$(LIBHACL_HEADERS)
763764

765+
##########################################################################
766+
# Internal library for cryptographic primitives
767+
768+
LIBHASHLIB_INTERNAL_OBJS= \
769+
Modules/_hashlib/hashlib_buffer.o
770+
771+
LIBHASHLIB_INTERNAL_HEADERS= \
772+
Modules/_hashlib/hashlib_buffer.h \
773+
Modules/_hashlib/hashlib_fetch.h \
774+
Modules/_hashlib/hashlib_mutex.h
775+
764776
#########################################################################
765777
# Rules
766778

@@ -1511,6 +1523,17 @@ $(LIBEXPAT_A): $(LIBEXPAT_OBJS)
15111523
-rm -f $@
15121524
$(AR) $(ARFLAGS) $@ $(LIBEXPAT_OBJS)
15131525

1526+
##########################################################################
1527+
# '_hashlib', '_hmac' and HACL*-based modules helpers
1528+
LIBHASHLIB_INTERNAL_CFLAGS=@LIBHASHLIB_INTERNAL_CFLAGS@ $(PY_STDMODULE_CFLAGS) $(CCSHARED)
1529+
1530+
Modules/_hashlib/hashlib_buffer.o: Modules/_hashlib/hashlib_buffer.c $(LIBHASHLIB_INTERNAL_HEADERS) $(PYTHON_HEADERS)
1531+
$(CC) -I$(srcdir)/Modules/_hashlib -c $(LIBHASHLIB_INTERNAL_CFLAGS) -o $@ $(srcdir)/Modules/_hashlib/hashlib_buffer.c
1532+
1533+
$(LIBHASHLIB_INTERNAL_A): $(LIBHASHLIB_INTERNAL_OBJS)
1534+
-rm -f $@
1535+
$(AR) $(ARFLAGS) $@ $(LIBHASHLIB_INTERNAL_OBJS)
1536+
15141537
##########################################################################
15151538
# HACL* library build
15161539
#
@@ -3353,21 +3376,21 @@ MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h
33533376
MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@
33543377
MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@
33553378
MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
3356-
MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
3379+
MODULE__HASHLIB_DEPS=@LIBHASHLIB_INTERNAL@
33573380
MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
33583381

33593382
# HACL*-based cryptographic primitives
3360-
MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_MD5_HEADERS) $(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3383+
MODULE__MD5_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_MD5_HEADERS) $(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33613384
MODULE__MD5_LDEPS=$(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3362-
MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA1_HEADERS) $(LIBHACL_SHA1_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3385+
MODULE__SHA1_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_SHA1_HEADERS) $(LIBHACL_SHA1_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33633386
MODULE__SHA1_LDEPS=$(LIBHACL_SHA1_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3364-
MODULE__SHA2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA2_HEADERS) $(LIBHACL_SHA2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3387+
MODULE__SHA2_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_SHA2_HEADERS) $(LIBHACL_SHA2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33653388
MODULE__SHA2_LDEPS=$(LIBHACL_SHA2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3366-
MODULE__SHA3_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA3_HEADERS) $(LIBHACL_SHA3_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3389+
MODULE__SHA3_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_SHA3_HEADERS) $(LIBHACL_SHA3_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33673390
MODULE__SHA3_LDEPS=$(LIBHACL_SHA3_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3368-
MODULE__BLAKE2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_BLAKE2_HEADERS) $(LIBHACL_BLAKE2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3391+
MODULE__BLAKE2_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_BLAKE2_HEADERS) $(LIBHACL_BLAKE2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33693392
MODULE__BLAKE2_LDEPS=$(LIBHACL_BLAKE2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3370-
MODULE__HMAC_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HMAC_HEADERS) $(LIBHACL_HMAC_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3393+
MODULE__HMAC_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_HMAC_HEADERS) $(LIBHACL_HMAC_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33713394
MODULE__HMAC_LDEPS=$(LIBHACL_HMAC_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33723395

33733396
MODULE__SOCKET_DEPS=$(srcdir)/Modules/socketmodule.h $(srcdir)/Modules/addrinfo.h $(srcdir)/Modules/getaddrinfo.c $(srcdir)/Modules/getnameinfo.c
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Remove :file:`!Modules/hashlib.h` and move its content into dedicated files
2+
now located in ``Modules/_hashlib``. Patch by Bénédikt Tran.

Modules/_hashlib/hashlib_buffer.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#include "hashlib_buffer.h"
2+
3+
int
4+
_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string)
5+
{
6+
if (data != NULL && string == NULL) {
7+
// called as H(data) or H(data=...)
8+
*res = data;
9+
return 1;
10+
}
11+
else if (data == NULL && string != NULL) {
12+
// called as H(string=...)
13+
if (PyErr_WarnEx(PyExc_DeprecationWarning,
14+
"the 'string' keyword parameter is deprecated since "
15+
"Python 3.15 and slated for removal in Python 3.19; "
16+
"use the 'data' keyword parameter or pass the data "
17+
"to hash as a positional argument instead", 1) < 0)
18+
{
19+
*res = NULL;
20+
return -1;
21+
}
22+
*res = string;
23+
return 1;
24+
}
25+
else if (data == NULL && string == NULL) {
26+
// fast path when no data is given
27+
assert(!PyErr_Occurred());
28+
*res = NULL;
29+
return 0;
30+
}
31+
else {
32+
// called as H(data=..., string)
33+
*res = NULL;
34+
PyErr_SetString(PyExc_TypeError,
35+
"'data' and 'string' are mutually exclusive "
36+
"and support for 'string' keyword parameter "
37+
"is slated for removal in a future version.");
38+
return -1;
39+
}
40+
}
41+
42+
int
43+
_Py_hashlib_get_buffer_view(PyObject *obj, Py_buffer *view)
44+
{
45+
if (PyUnicode_Check(obj)) {
46+
PyErr_SetString(PyExc_TypeError,
47+
"Strings must be encoded before hashing");
48+
return -1;
49+
}
50+
if (!PyObject_CheckBuffer(obj)) {
51+
PyErr_SetString(PyExc_TypeError,
52+
"object supporting the buffer API required");
53+
return -1;
54+
}
55+
if (PyObject_GetBuffer(obj, view, PyBUF_SIMPLE) == -1) {
56+
return -1;
57+
}
58+
if (view->ndim > 1) {
59+
PyErr_SetString(PyExc_BufferError,
60+
"Buffer must be single dimension");
61+
PyBuffer_Release(view);
62+
return -1;
63+
}
64+
return 0;
65+
}

Modules/_hashlib/hashlib_buffer.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#ifndef _HASHLIB_HASHLIB_BUFFER_H
2+
#define _HASHLIB_HASHLIB_BUFFER_H
3+
4+
#include "Python.h"
5+
6+
/*
7+
* Allow to use the 'data' or 'string' keyword in hashlib.new()
8+
* and other hash functions named constructors.
9+
*
10+
* - If 'data' and 'string' are both non-NULL, set an exception and return -1.
11+
* - If 'data' and 'string' are both NULL, set '*res' to NULL and return 0.
12+
* - Otherwise, set '*res' to 'data' or 'string' and return 1. A deprecation
13+
* warning is set when 'string' is specified.
14+
*
15+
* The symbol is exported for '_hashlib' and HACL*-based extension modules.
16+
*/
17+
PyAPI_FUNC(int)
18+
_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string);
19+
20+
/*
21+
* Obtain a buffer view from a buffer-like object 'obj'.
22+
*
23+
* On success, store the result in 'view' and return 0.
24+
* On error, set an exception and return -1.
25+
*
26+
* The symbol is exported for '_hashlib' and HACL*-based extension modules.
27+
*/
28+
PyAPI_FUNC(int)
29+
_Py_hashlib_get_buffer_view(PyObject *obj, Py_buffer *view);
30+
31+
/*
32+
* Call _Py_hashlib_get_buffer_view() and check if it succeeded.
33+
*
34+
* On error, set an exception and execute the ERRACTION statements.
35+
*/
36+
#define GET_BUFFER_VIEW_OR_ERROR(OBJ, VIEW, ERRACTION) \
37+
do { \
38+
if (_Py_hashlib_get_buffer_view(OBJ, VIEW) < 0) { \
39+
assert(PyErr_Occurred()); \
40+
ERRACTION; \
41+
} \
42+
} while (0)
43+
44+
/* Specialization of GET_BUFFER_VIEW_OR_ERROR() returning NULL on error. */
45+
#define GET_BUFFER_VIEW_OR_ERROUT(OBJ, VIEW) \
46+
GET_BUFFER_VIEW_OR_ERROR(OBJ, VIEW, return NULL)
47+
48+
#endif // !_HASHLIB_HASHLIB_BUFFER_H

Modules/_hashlib/hashlib_fetch.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Utilities used when fetching a message digest from a digest-like identifier.
3+
*/
4+
5+
#ifndef _HASHLIB_HASHLIB_FETCH_H
6+
#define _HASHLIB_HASHLIB_FETCH_H
7+
8+
#include "Python.h"
9+
10+
/*
11+
* Internal error messages used for reporting an unsupported hash algorithm.
12+
* The algorithm can be given by its name, a callable or a PEP-247 module.
13+
* The same message is raised by Lib/hashlib.py::__get_builtin_constructor()
14+
* and _hmacmodule.c::find_hash_info().
15+
*/
16+
#define _Py_HASHLIB_UNSUPPORTED_ALGORITHM "unsupported hash algorithm %S"
17+
#define _Py_HASHLIB_UNSUPPORTED_STR_ALGORITHM "unsupported hash algorithm %s"
18+
19+
#endif // !_HASHLIB_HASHLIB_FETCH_H

Modules/_hashlib/hashlib_mutex.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#ifndef _HASHLIB_HASHLIB_MUTEX_H
2+
#define _HASHLIB_HASHLIB_MUTEX_H
3+
4+
#include "Python.h"
5+
#include "pycore_lock.h" // PyMutex
6+
7+
/*
8+
* Message length above which the GIL is to be released
9+
* when performing hashing operations.
10+
*/
11+
#define HASHLIB_GIL_MINSIZE 2048
12+
13+
/*
14+
* Helper code to synchronize access to the hash object when the GIL is
15+
* released around a CPU consuming hashlib operation.
16+
*
17+
* Code accessing a mutable part of the hash object must be enclosed in
18+
* an HASHLIB_{ACQUIRE,RELEASE}_LOCK block or explicitly acquire and release
19+
* the mutex inside a Py_BEGIN_ALLOW_THREADS -- Py_END_ALLOW_THREADS block if
20+
* they wish to release the GIL for an operation.
21+
*/
22+
23+
#define HASHLIB_OBJECT_HEAD \
24+
PyObject_HEAD \
25+
/* Guard against race conditions during incremental update(). */ \
26+
PyMutex mutex;
27+
28+
#define HASHLIB_INIT_MUTEX(OBJ) \
29+
do { \
30+
(OBJ)->mutex = (PyMutex){0}; \
31+
} while (0)
32+
33+
#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex)
34+
#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex)
35+
36+
// Macros for executing code while conditionally holding the GIL.
37+
//
38+
// These only drop the GIL if the lock acquisition itself is likely to
39+
// block. Thus the non-blocking acquire gating the GIL release for a
40+
// blocking lock acquisition. The intent of these macros is to surround
41+
// the assumed always "fast" operations that you aren't releasing the
42+
// GIL around.
43+
44+
/*
45+
* Execute a suite of C statements 'STATEMENTS'.
46+
*
47+
* The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold.
48+
*/
49+
#define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS) \
50+
do { \
51+
if ((SIZE) > HASHLIB_GIL_MINSIZE) { \
52+
Py_BEGIN_ALLOW_THREADS \
53+
STATEMENTS; \
54+
Py_END_ALLOW_THREADS \
55+
} \
56+
else { \
57+
STATEMENTS; \
58+
} \
59+
} while (0)
60+
61+
/*
62+
* Lock 'OBJ' and execute a suite of C statements 'STATEMENTS'.
63+
*
64+
* The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold.
65+
*/
66+
#define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \
67+
do { \
68+
if ((SIZE) > HASHLIB_GIL_MINSIZE) { \
69+
Py_BEGIN_ALLOW_THREADS \
70+
HASHLIB_ACQUIRE_LOCK(OBJ); \
71+
STATEMENTS; \
72+
HASHLIB_RELEASE_LOCK(OBJ); \
73+
Py_END_ALLOW_THREADS \
74+
} \
75+
else { \
76+
HASHLIB_ACQUIRE_LOCK(OBJ); \
77+
STATEMENTS; \
78+
HASHLIB_RELEASE_LOCK(OBJ); \
79+
} \
80+
} while (0)
81+
82+
#endif // !_HASHLIB_HASHLIB_MUTEX_H

Modules/_hashopenssl.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,17 @@
2424

2525
#include "Python.h"
2626
#include "pycore_hashtable.h"
27-
#include "pycore_strhex.h" // _Py_strhex()
28-
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_RELAXED
29-
#include "hashlib.h"
27+
#include "pycore_strhex.h" // _Py_strhex()
28+
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_RELAXED
29+
30+
#include "_hashlib/hashlib_buffer.h"
31+
#include "_hashlib/hashlib_fetch.h"
32+
#include "_hashlib/hashlib_mutex.h"
3033

3134
/* EVP is the preferred interface to hashing in OpenSSL */
3235
#include <openssl/evp.h>
3336
#include <openssl/hmac.h>
34-
#include <openssl/crypto.h> // FIPS_mode()
37+
#include <openssl/crypto.h> // FIPS_mode()
3538
/* We use the object interface to discover what hashes OpenSSL supports. */
3639
#include <openssl/objects.h>
3740
#include <openssl/err.h>
@@ -532,7 +535,7 @@ raise_unsupported_algorithm_error(_hashlibstate *state, PyObject *digestmod)
532535
{
533536
raise_unsupported_algorithm_impl(
534537
state->unsupported_digestmod_error,
535-
HASHLIB_UNSUPPORTED_ALGORITHM,
538+
_Py_HASHLIB_UNSUPPORTED_ALGORITHM,
536539
digestmod
537540
);
538541
}
@@ -542,7 +545,7 @@ raise_unsupported_str_algorithm_error(_hashlibstate *state, const char *name)
542545
{
543546
raise_unsupported_algorithm_impl(
544547
state->unsupported_digestmod_error,
545-
HASHLIB_UNSUPPORTED_STR_ALGORITHM,
548+
_Py_HASHLIB_UNSUPPORTED_STR_ALGORITHM,
546549
name
547550
);
548551
}

Modules/blake2module.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
#endif
1616

1717
#include "Python.h"
18-
#include "hashlib.h"
19-
#include "pycore_strhex.h" // _Py_strhex()
20-
#include "pycore_typeobject.h"
2118
#include "pycore_moduleobject.h"
19+
#include "pycore_strhex.h" // _Py_strhex()
20+
#include "pycore_typeobject.h"
21+
22+
#include "_hashlib/hashlib_buffer.h"
23+
#include "_hashlib/hashlib_mutex.h"
2224

2325
// QUICK CPU AUTODETECTION
2426
//

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy