From 2bee4594c44047b907d7641a7b11674ad1fd2588 Mon Sep 17 00:00:00 2001 From: Isidro Arias Date: Sat, 19 Apr 2025 12:55:00 +0200 Subject: [PATCH 1/6] Mutable _Item --- data_structures/hashing/hash_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/hashing/hash_map.py b/data_structures/hashing/hash_map.py index 9213d6930f67..7c4f372c67ac 100644 --- a/data_structures/hashing/hash_map.py +++ b/data_structures/hashing/hash_map.py @@ -16,7 +16,7 @@ VAL = TypeVar("VAL") -@dataclass(frozen=True, slots=True) +@dataclass(slots=True) class _Item(Generic[KEY, VAL]): key: KEY val: VAL @@ -81,7 +81,7 @@ def _try_set(self, ind: int, key: KEY, val: VAL) -> bool: self._len += 1 return True elif stored.key == key: - self._buckets[ind] = _Item(key, val) + stored.val = val return True else: return False From 454eded043d5a0a2024a9509b622d567ec9915c9 Mon Sep 17 00:00:00 2001 From: Isidro Arias Date: Sat, 19 Apr 2025 12:59:28 +0200 Subject: [PATCH 2/6] document falsy item --- data_structures/hashing/hash_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/hashing/hash_map.py b/data_structures/hashing/hash_map.py index 7c4f372c67ac..306bff93dc4b 100644 --- a/data_structures/hashing/hash_map.py +++ b/data_structures/hashing/hash_map.py @@ -72,11 +72,11 @@ def _try_set(self, ind: int, key: KEY, val: VAL) -> bool: If bucket is empty or key is the same, does insert and return True. - If bucket has another key or deleted placeholder, - that means that we need to check next bucket. + If bucket has another key that means that we need to check next bucket. """ stored = self._buckets[ind] if not stored: + # A falsy item means that is None (bucket never used) or _deleted. self._buckets[ind] = _Item(key, val) self._len += 1 return True From 2c99889fc597dea69e996ddbf8689ce27d27d9f8 Mon Sep 17 00:00:00 2001 From: Isidro Arias Date: Sat, 19 Apr 2025 13:02:36 +0200 Subject: [PATCH 3/6] resize_down: expected test result --- data_structures/hashing/hash_map.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/data_structures/hashing/hash_map.py b/data_structures/hashing/hash_map.py index 306bff93dc4b..9a560f909465 100644 --- a/data_structures/hashing/hash_map.py +++ b/data_structures/hashing/hash_map.py @@ -228,6 +228,28 @@ def __delitem__(self, key: KEY) -> None: Traceback (most recent call last): ... KeyError: 4 + + # Test resize down when sparse + ## Setup: resize up + >>> hm = HashMap(100, capacity_factor=0.75) + >>> len(hm._buckets) + 100 + >>> for i in range(75): + ... hm[i] = i + >>> len(hm._buckets) + 100 + >>> hm[75] = 75 + >>> len(hm._buckets) + 200 + + ## Resize down + >>> for i in range(38, 76): + ... del hm[i] + >>> len(hm._buckets) + 200 + >>> del hm[37] + >>> len(hm._buckets) + 100 """ for ind in self._iterate_buckets(key): item = self._buckets[ind] From 635430404179bc08e73ff9c6bb0b27de6f112de7 Mon Sep 17 00:00:00 2001 From: Isidro Arias Date: Sat, 19 Apr 2025 13:03:23 +0200 Subject: [PATCH 4/6] resize_down: actual result This is a problem since it causes rapid cycling --- data_structures/hashing/hash_map.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/data_structures/hashing/hash_map.py b/data_structures/hashing/hash_map.py index 9a560f909465..5d89f807e243 100644 --- a/data_structures/hashing/hash_map.py +++ b/data_structures/hashing/hash_map.py @@ -243,11 +243,8 @@ def __delitem__(self, key: KEY) -> None: 200 ## Resize down - >>> for i in range(38, 76): - ... del hm[i] - >>> len(hm._buckets) - 200 - >>> del hm[37] + >>> del hm[75] + >>> del hm[74] >>> len(hm._buckets) 100 """ From 875caab8cd4a8fe2c168ab181c79aab254a5837b Mon Sep 17 00:00:00 2001 From: Isidro Date: Thu, 24 Apr 2025 10:00:47 +0200 Subject: [PATCH 5/6] improve comment about falsy item Co-authored-by: Andrey --- data_structures/hashing/hash_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/hashing/hash_map.py b/data_structures/hashing/hash_map.py index 5d89f807e243..de433102b0e5 100644 --- a/data_structures/hashing/hash_map.py +++ b/data_structures/hashing/hash_map.py @@ -76,7 +76,7 @@ def _try_set(self, ind: int, key: KEY, val: VAL) -> bool: """ stored = self._buckets[ind] if not stored: - # A falsy item means that is None (bucket never used) or _deleted. + # A falsy item means that bucket was never used (None) or was deleted (_deleted). self._buckets[ind] = _Item(key, val) self._len += 1 return True From cff17ecd764beaedc5f39a9bd821addc0c236592 Mon Sep 17 00:00:00 2001 From: Isidro Arias Date: Thu, 24 Apr 2025 10:10:29 +0200 Subject: [PATCH 6/6] fix long line --- data_structures/hashing/hash_map.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data_structures/hashing/hash_map.py b/data_structures/hashing/hash_map.py index de433102b0e5..1a0931c7f271 100644 --- a/data_structures/hashing/hash_map.py +++ b/data_structures/hashing/hash_map.py @@ -76,7 +76,8 @@ def _try_set(self, ind: int, key: KEY, val: VAL) -> bool: """ stored = self._buckets[ind] if not stored: - # A falsy item means that bucket was never used (None) or was deleted (_deleted). + # A falsy item means that bucket was never used (None) + # or was deleted (_deleted). self._buckets[ind] = _Item(key, val) self._len += 1 return True