From 474c166cc1af650ade88c7393328cb294990f2dd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 May 2026 19:39:37 +0200 Subject: [PATCH 1/7] gh-149807: Fix hash(frozendict): compute (key, value) pair hash --- Lib/test/test_dict.py | 11 +++++ ...-05-14-19-41-03.gh-issue-149807.IwGaCo.rst | 2 + Objects/dictobject.c | 46 +++++++++++++++---- 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 4efb066d4fd01c..994310cde7d892 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1903,6 +1903,17 @@ def test_hash(self): self.assertEqual(hash(frozendict(x=1, y=2)), hash(frozendict(y=2, x=1))) + # Check that hash() computes the hash of (key, value) pairs + cases = [ + frozendict(a=False, b=True, c=True), + frozendict(a=True, b=False, c=True), + frozendict(a=True, b=True, c=False), + frozendict({False: "a", "b": True, "c": True}), + frozendict({"a": "b", False: True, True: "c"}), + ] + hashes = {hash(fd) for fd in cases} + self.assertEqual(len(hashes), 5) + fd = frozendict(x=[1], y=[2]) with self.assertRaisesRegex(TypeError, "unhashable type: 'list'"): hash(fd) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst new file mode 100644 index 00000000000000..a94c737e73619d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst @@ -0,0 +1,2 @@ +Fix ``hash(frozendict)``: compute the hash of each ``(key, value)`` pair +correctly. Patch by Victor Stinner. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b33a273dac3b95..94eef2ab3e13a3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8228,6 +8228,40 @@ _shuffle_bits(Py_uhash_t h) return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; } +// Compute hash((key, value)). +// Code copied from tuple_hash(). +static Py_hash_t +frozendict_pair_hash(PyObject *key, PyObject *value) +{ + Py_ssize_t len = 2; + Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5; + + Py_uhash_t lane = PyObject_Hash(key); + if (lane == (Py_uhash_t)-1) { + return -1; + } + acc += lane * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; + + lane = PyObject_Hash(value); + if (lane == (Py_uhash_t)-1) { + return -1; + } + acc += lane * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; + + /* Add input length, mangled to keep the historical value of hash(()). */ + acc += len ^ (_PyTuple_HASH_XXPRIME_5 ^ 3527539UL); + + if (acc == (Py_uhash_t)-1) { + acc = 1546275796; + } + return acc; +} + + // Code copied from frozenset_hash() static Py_hash_t frozendict_hash(PyObject *op) @@ -8244,17 +8278,11 @@ frozendict_hash(PyObject *op) PyObject *key, *value; // borrowed refs Py_ssize_t pos = 0; while (PyDict_Next(op, &pos, &key, &value)) { - Py_hash_t key_hash = PyObject_Hash(key); - if (key_hash == -1) { - return -1; - } - hash ^= _shuffle_bits(key_hash); - - Py_hash_t value_hash = PyObject_Hash(value); - if (value_hash == -1) { + Py_hash_t pair_hash = frozendict_pair_hash(key, value); + if (pair_hash == -1) { return -1; } - hash ^= _shuffle_bits(value_hash); + hash ^= _shuffle_bits(pair_hash); } /* Factor in the number of active entries */ From 75abaed6767cb90e195b09eb472c650f2ef8a9a7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 May 2026 19:49:22 +0200 Subject: [PATCH 2/7] Cleanup the test --- Lib/test/test_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 994310cde7d892..62a3d3bf938e73 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1912,7 +1912,7 @@ def test_hash(self): frozendict({"a": "b", False: True, True: "c"}), ] hashes = {hash(fd) for fd in cases} - self.assertEqual(len(hashes), 5) + self.assertEqual(len(hashes), len(cases)) fd = frozendict(x=[1], y=[2]) with self.assertRaisesRegex(TypeError, "unhashable type: 'list'"): From 92b6eb4a004a1e96b9f0b2cdc2c9d0d5b0ebcaf8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 May 2026 19:50:51 +0200 Subject: [PATCH 3/7] Add comment --- Objects/tupleobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 94230002427546..6120e70f3eeea4 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -363,6 +363,9 @@ tuple_repr(PyObject *self) https://github.com/Cyan4973/xxHash/blob/master/doc/xxhash_spec.md The constants for the hash function are defined in pycore_tuple.h. + + If you update this code, update also frozendict_pair_hash() which copied + this code. */ static Py_hash_t From abaa568e52fb0ecaba1003a0599e6c24acb85ccf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 May 2026 14:38:14 +0200 Subject: [PATCH 4/7] Add const to 'Py_ssize_t len' variable --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 94eef2ab3e13a3..ee265388e706fe 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8233,7 +8233,7 @@ _shuffle_bits(Py_uhash_t h) static Py_hash_t frozendict_pair_hash(PyObject *key, PyObject *value) { - Py_ssize_t len = 2; + const Py_ssize_t len = 2; Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5; Py_uhash_t lane = PyObject_Hash(key); From a55a54e769d0421364d4df7e81b942d7fe376c6d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 May 2026 14:42:10 +0200 Subject: [PATCH 5/7] Add test_hash_cpython() --- Lib/test/test_dict.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 62a3d3bf938e73..f26586809238f0 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1918,6 +1918,20 @@ def test_hash(self): with self.assertRaisesRegex(TypeError, "unhashable type: 'list'"): hash(fd) + @support.cpython_only + def test_hash_cpython(self): + # Check that hash(frozendict) implementation is: + # hash(frozenset(fd.items())) + for fd in ( + frozendict(), + frozendict(x=1, y=2), + frozendict(y=2, x=1), + frozendict(a=False, b=True, c=True), + frozendict.fromkeys('abc'), + ): + with self.subTest(fd=fd): + self.assertEqual(hash(fd), hash(frozenset(fd.items()))) + def test_fromkeys(self): self.assertEqual(frozendict.fromkeys('abc'), frozendict(a=None, b=None, c=None)) From b4b9e33fa6ebb931ceca9982da2efafe86f86dec Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 May 2026 14:44:19 +0200 Subject: [PATCH 6/7] Use _PyDict_Next() --- Objects/dictobject.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ee265388e706fe..233b8e9dacc319 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8231,15 +8231,14 @@ _shuffle_bits(Py_uhash_t h) // Compute hash((key, value)). // Code copied from tuple_hash(). static Py_hash_t -frozendict_pair_hash(PyObject *key, PyObject *value) +frozendict_pair_hash(Py_hash_t key_hash, PyObject *value) { + assert(key_hash != (Py_uhash_t)-1); + const Py_ssize_t len = 2; Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5; - Py_uhash_t lane = PyObject_Hash(key); - if (lane == (Py_uhash_t)-1) { - return -1; - } + Py_uhash_t lane = key_hash; acc += lane * _PyTuple_HASH_XXPRIME_2; acc = _PyTuple_HASH_XXROTATE(acc); acc *= _PyTuple_HASH_XXPRIME_1; @@ -8275,10 +8274,11 @@ frozendict_hash(PyObject *op) PyDictObject *mp = _PyAnyDict_CAST(op); Py_uhash_t hash = 0; - PyObject *key, *value; // borrowed refs + PyObject *value; // borrowed ref Py_ssize_t pos = 0; - while (PyDict_Next(op, &pos, &key, &value)) { - Py_hash_t pair_hash = frozendict_pair_hash(key, value); + Py_hash_t key_hash; + while (_PyDict_Next(op, &pos, NULL, &value, &key_hash)) { + Py_hash_t pair_hash = frozendict_pair_hash(key_hash, value); if (pair_hash == -1) { return -1; } From 855727712953d3a93b2d3022a06378592defd9a3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 18 May 2026 18:11:04 +0200 Subject: [PATCH 7/7] Fix compiler warning --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 233b8e9dacc319..cbccc633431997 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8233,7 +8233,7 @@ _shuffle_bits(Py_uhash_t h) static Py_hash_t frozendict_pair_hash(Py_hash_t key_hash, PyObject *value) { - assert(key_hash != (Py_uhash_t)-1); + assert(key_hash != -1); const Py_ssize_t len = 2; Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5;