diff --git a/extmod/modselect.c b/extmod/modselect.c index 229f8f737b517..28b5b0bf6ccd9 100644 --- a/extmod/modselect.c +++ b/extmod/modselect.c @@ -205,7 +205,7 @@ static struct pollfd *poll_set_add_fd(poll_set_t *poll_set, int fd) { } static inline bool poll_set_all_are_fds(poll_set_t *poll_set) { - return poll_set->map.used == poll_set->used; + return mp_map_len(&poll_set->map) == poll_set->used; } #else diff --git a/ports/cc3200/mods/pybpin.c b/ports/cc3200/mods/pybpin.c index ea40aa9df8d20..fde230d7a6d2a 100644 --- a/ports/cc3200/mods/pybpin.c +++ b/ports/cc3200/mods/pybpin.c @@ -118,7 +118,7 @@ void pin_init0(void) { // assign all pins to the GPIO module so that peripherals can be connected to any // pins without conflicts after a soft reset const mp_map_t *named_map = &pin_board_pins_locals_dict.map; - for (uint i = 0; i < named_map->used - 1; i++) { + for (uint i = 0; i < mp_map_len(named_map) - 1; i++) { pin_obj_t * pin = (pin_obj_t *)named_map->table[i].value; pin_deassign (pin); } @@ -216,7 +216,7 @@ static pin_obj_t *pin_find_named_pin(const mp_obj_dict_t *named_pins, mp_obj_t n static pin_obj_t *pin_find_pin_by_port_bit (const mp_obj_dict_t *named_pins, uint port, uint bit) { const mp_map_t *named_map = &named_pins->map; - for (uint i = 0; i < named_map->used; i++) { + for (uint i = 0; i < mp_map_len(named_map); i++) { if ((((pin_obj_t *)named_map->table[i].value)->port == port) && (((pin_obj_t *)named_map->table[i].value)->bit == bit)) { return named_map->table[i].value; @@ -236,7 +236,7 @@ static int8_t pin_obj_find_af (const pin_obj_t* pin, uint8_t fn, uint8_t unit, u static void pin_free_af_from_pins (uint8_t fn, uint8_t unit, uint8_t type) { const mp_map_t *named_map = &pin_board_pins_locals_dict.map; - for (uint i = 0; i < named_map->used - 1; i++) { + for (uint i = 0; i < mp_map_len(named_map) - 1; i++) { pin_obj_t * pin = (pin_obj_t *)named_map->table[i].value; // af is different than GPIO if (pin->af > PIN_MODE_0) { diff --git a/ports/cc3200/mods/pybsleep.c b/ports/cc3200/mods/pybsleep.c index 291860de8d676..6ec2e306f08e8 100644 --- a/ports/cc3200/mods/pybsleep.c +++ b/ports/cc3200/mods/pybsleep.c @@ -528,7 +528,7 @@ static void pyb_sleep_obj_wakeup (void) { static void pyb_sleep_iopark (bool hibernate) { const mp_map_t *named_map = &pin_board_pins_locals_dict.map; - for (uint i = 0; i < named_map->used; i++) { + for (uint i = 0; i < mp_map_len(named_map); i++) { pin_obj_t * pin = (pin_obj_t *)named_map->table[i].value; switch (pin->pin_num) { #ifdef DEBUG diff --git a/ports/nrf/modules/machine/pin.c b/ports/nrf/modules/machine/pin.c index f46394d769c95..eade9343af31f 100644 --- a/ports/nrf/modules/machine/pin.c +++ b/ports/nrf/modules/machine/pin.c @@ -441,7 +441,7 @@ static mp_obj_t pin_names(mp_obj_t self_in) { mp_map_t *map = mp_obj_dict_get_map((mp_obj_t)&pin_board_pins_locals_dict); mp_map_elem_t *elem = map->table; - for (mp_uint_t i = 0; i < map->used; i++, elem++) { + for (mp_uint_t i = 0; i < mp_map_len(map); i++, elem++) { if (elem->value == self) { mp_obj_list_append(result, elem->key); } diff --git a/ports/nrf/modules/ubluepy/ubluepy_scan_entry.c b/ports/nrf/modules/ubluepy/ubluepy_scan_entry.c index cf329ddd9d780..2d068beedb31e 100644 --- a/ports/nrf/modules/ubluepy/ubluepy_scan_entry.c +++ b/ports/nrf/modules/ubluepy/ubluepy_scan_entry.c @@ -94,7 +94,7 @@ static mp_obj_t scan_entry_get_scan_data(mp_obj_t self_in) { mp_map_t *constant_map = mp_obj_dict_get_map(MP_OBJ_TYPE_GET_SLOT(&ubluepy_constants_ad_types_type, locals_dict)); mp_map_elem_t *ad_types_table = MP_OBJ_TO_PTR(constant_map->table); - uint16_t num_of_elements = constant_map->used; + uint16_t num_of_elements = mp_map_len(constant_map); for (uint16_t i = 0; i < num_of_elements; i++) { mp_map_elem_t element = (mp_map_elem_t)*ad_types_table; diff --git a/ports/stm32/pin.c b/ports/stm32/pin.c index 515437ac8610e..a3925e8cff0b0 100644 --- a/ports/stm32/pin.c +++ b/ports/stm32/pin.c @@ -470,7 +470,7 @@ static mp_obj_t pin_names(mp_obj_t self_in) { const mp_map_t *map = &machine_pin_board_pins_locals_dict.map; mp_map_elem_t *elem = map->table; - for (mp_uint_t i = 0; i < map->used; i++, elem++) { + for (mp_uint_t i = 0; i < mp_map_len(map); i++, elem++) { if (elem->value == self_in) { mp_obj_list_append(result, elem->key); } diff --git a/ports/unix/variants/coverage/mpconfigvariant.h b/ports/unix/variants/coverage/mpconfigvariant.h index 2f5d9683b3f4b..5541dc7f87332 100644 --- a/ports/unix/variants/coverage/mpconfigvariant.h +++ b/ports/unix/variants/coverage/mpconfigvariant.h @@ -46,6 +46,7 @@ #define MICROPY_VFS_ROM_IOCTL (1) #define MICROPY_PY_CRYPTOLIB_CTR (1) #define MICROPY_SCHEDULER_STATIC_NODES (1) +#define MICROPY_PY_MAP_LARGE (1) // Enable os.uname for attrtuple coverage test #define MICROPY_PY_OS_UNAME (1) diff --git a/py/map.c b/py/map.c index d40e3dc4d02d7..14275b6b06392 100644 --- a/py/map.c +++ b/py/map.c @@ -61,6 +61,55 @@ #define MAP_CACHE_SET(index, pos) #endif +#if MICROPY_PY_MAP_ORDERED +// Macros and functions to deal with key/value table and hash table. +// map->table points to the key/value table, then the hash table follows, +// which can be uint8_t, uint16_t, or uint32_t depending on allocation size. +#define MP_MAP_IS_UINT8(alloc) ((alloc) < 256) +#define MP_MAP_IS_UINT16(alloc) ((alloc) < 65536) +#if MICROPY_PY_MAP_LARGE +#define MP_MAP_INDEX_SIZE(alloc) (MP_MAP_IS_UINT8(alloc) ? 1 : (MP_MAP_IS_UINT16(alloc) ? 2 : 4)) +#else +#define MP_MAP_INDEX_SIZE(alloc) (MP_MAP_IS_UINT8(alloc) ? 1 : 2) +#endif +#define MP_MAP_TABLE_BYTE_SIZE(alloc) ((sizeof(mp_map_elem_t) + MP_MAP_INDEX_SIZE(alloc)) * (alloc)) +#define MP_MAP_GET_HASH_TABLE(map) ((void *)&(map)->table[(map)->alloc]) + +static inline size_t mp_map_hash_table_get(const mp_map_t *map, void *hash_table, size_t pos) { + if (MP_MAP_IS_UINT8(map->alloc)) { + return ((uint8_t *)hash_table)[pos]; + } + #if MICROPY_PY_MAP_LARGE + else if (MP_MAP_IS_UINT16(map->alloc)) { + return ((uint16_t *)hash_table)[pos]; + } else { + return ((uint32_t *)hash_table)[pos]; + } + #else + else { + return ((uint16_t *)hash_table)[pos]; + } + #endif +} + +static inline void mp_map_hash_table_put(const mp_map_t *map, void *hash_table, size_t pos, size_t value) { + if (MP_MAP_IS_UINT8(map->alloc)) { + ((uint8_t *)hash_table)[pos] = value; + } + #if MICROPY_PY_MAP_LARGE + else if (MP_MAP_IS_UINT16(map->alloc)) { + ((uint16_t *)hash_table)[pos] = value; + } else { + ((uint32_t *)hash_table)[pos] = value; + } + #else + else { + ((uint16_t *)hash_table)[pos] = value; + } + #endif +} +#endif // MICROPY_PY_MAP_ORDERED + // This table of sizes is used to control the growth of hash tables. // The first set of sizes are chosen so the allocation fits exactly in a // 4-word GC block, and it's not so important for these small values to be @@ -92,37 +141,114 @@ void mp_map_init(mp_map_t *map, size_t n) { map->table = NULL; } else { map->alloc = n; + #if MICROPY_PY_MAP_ORDERED + map->table = m_malloc0(MP_MAP_TABLE_BYTE_SIZE(map->alloc)); + #else map->table = m_new0(mp_map_elem_t, map->alloc); + #endif } map->used = 0; + #if MICROPY_PY_MAP_ORDERED + map->filled = 0; + #endif map->all_keys_are_qstrs = 1; map->is_fixed = 0; + #if !MICROPY_PY_MAP_ORDERED map->is_ordered = 0; + #endif } void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table) { map->alloc = n; map->used = n; + #if MICROPY_PY_MAP_ORDERED + map->filled = n; + #endif map->all_keys_are_qstrs = 1; map->is_fixed = 1; + #if !MICROPY_PY_MAP_ORDERED map->is_ordered = 1; + #endif map->table = (mp_map_elem_t *)table; } +#if MICROPY_PY_MAP_ORDERED +static void mp_map_compact(mp_map_t *map); + +// Build hash index from scratch for map->table[0..used). +// Assumes the hash index region is already zeroed. +static void mp_map_rebuild_hash_index(mp_map_t *map) { + void *hash_table = MP_MAP_GET_HASH_TABLE(map); + map->all_keys_are_qstrs = 1; + for (size_t j = 0; j < map->used; j++) { + mp_obj_t key = map->table[j].key; + mp_uint_t h; + if (mp_obj_is_qstr(key)) { + h = qstr_hash(MP_OBJ_QSTR_VALUE(key)); + } else { + map->all_keys_are_qstrs = 0; + h = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, key)); + } + size_t pos = h % map->alloc; + while (mp_map_hash_table_get(map, hash_table, pos) != 0) { + pos = (pos + 1) % map->alloc; + } + mp_map_hash_table_put(map, hash_table, pos, j + 1); + } +} +#endif + +void mp_map_init_copy(mp_map_t *map, const mp_map_t *src) { + map->alloc = src->alloc; + map->used = src->used; + map->all_keys_are_qstrs = src->all_keys_are_qstrs; + map->is_fixed = 0; + #if MICROPY_PY_MAP_ORDERED + map->filled = src->filled; + if (src->alloc == 0) { + map->table = NULL; + } else { + // Allocate zeroed hash table layout (dense array + hash index). + map->table = m_malloc0(MP_MAP_TABLE_BYTE_SIZE(map->alloc)); + memcpy(map->table, src->table, src->used * sizeof(mp_map_elem_t)); + // Compact handles both tombstone removal and hash index rebuild. + mp_map_compact(map); + } + #else + map->is_ordered = src->is_ordered; + map->table = m_new(mp_map_elem_t, map->alloc); + memcpy(map->table, src->table, map->alloc * sizeof(mp_map_elem_t)); + #endif +} + // Differentiate from mp_map_clear() - semantics is different void mp_map_deinit(mp_map_t *map) { if (!map->is_fixed) { + #if MICROPY_PY_MAP_ORDERED + m_del(uint8_t, map->table, MP_MAP_TABLE_BYTE_SIZE(map->alloc)); + #else m_del(mp_map_elem_t, map->table, map->alloc); + #endif } map->used = map->alloc = 0; + #if MICROPY_PY_MAP_ORDERED + map->filled = 0; + #endif } void mp_map_clear(mp_map_t *map) { if (!map->is_fixed) { + #if MICROPY_PY_MAP_ORDERED + m_del(uint8_t, map->table, MP_MAP_TABLE_BYTE_SIZE(map->alloc)); + #else m_del(mp_map_elem_t, map->table, map->alloc); + #endif } map->alloc = 0; map->used = 0; + #if MICROPY_PY_MAP_ORDERED + map->filled = 0; + #endif map->all_keys_are_qstrs = 1; map->is_fixed = 0; map->table = NULL; @@ -131,21 +257,79 @@ void mp_map_clear(mp_map_t *map) { static void mp_map_rehash(mp_map_t *map) { size_t old_alloc = map->alloc; size_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1); + #if MICROPY_PY_MAP_ORDERED + // Cap alloc at the smaller of: hash index type limit and used/filled + // bitfield limit. Without MAP_LARGE, the hash index is uint16 (max 65535). + // The bitfield limit is (1 << (4 * sizeof(size_t) - 1)) - 1. + size_t max_alloc = ((size_t)1 << (4 * sizeof(size_t) - 1)) - 1; + #if !MICROPY_PY_MAP_LARGE + if (max_alloc > 65535) { + max_alloc = 65535; + } + #endif + if (new_alloc > max_alloc) { + new_alloc = max_alloc; + } + #endif + if (new_alloc <= old_alloc) { + // Cannot grow further (e.g. hash index size limit reached). + m_malloc_fail(new_alloc); + } DEBUG_printf("mp_map_rehash(%p): " UINT_FMT " -> " UINT_FMT "\n", map, old_alloc, new_alloc); mp_map_elem_t *old_table = map->table; + #if MICROPY_PY_MAP_ORDERED + mp_map_elem_t *new_table = m_malloc0(MP_MAP_TABLE_BYTE_SIZE(new_alloc)); + #else mp_map_elem_t *new_table = m_new0(mp_map_elem_t, new_alloc); + #endif // If we reach this point, table resizing succeeded, now we can edit the old map. map->alloc = new_alloc; + map->table = new_table; + #if MICROPY_PY_MAP_ORDERED + // Copy dense entries directly and rebuild hash index. + // Note: rehash is only called when filled == used (no tombstones). + memcpy(new_table, old_table, map->used * sizeof(mp_map_elem_t)); + mp_map_rebuild_hash_index(map); + m_del(uint8_t, old_table, MP_MAP_TABLE_BYTE_SIZE(old_alloc)); + #else map->used = 0; map->all_keys_are_qstrs = 1; - map->table = new_table; for (size_t i = 0; i < old_alloc; i++) { if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) { mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value; } } m_del(mp_map_elem_t, old_table, old_alloc); + #endif +} + +#if MICROPY_PY_MAP_ORDERED +// Compact map in-place by removing tombstones from the dense array +// and rebuilding the hash index. +static void mp_map_compact(mp_map_t *map) { + DEBUG_printf("mp_map_compact(%p): used=" UINT_FMT " filled=" UINT_FMT "\n", map, map->used, map->filled); + // Shift live entries down over tombstones. + size_t dest = 0; + for (size_t src = 0; src < map->used; src++) { + if (mp_map_slot_is_filled(map, src)) { + if (dest != src) { + map->table[dest] = map->table[src]; + } + dest++; + } + } + // Clear tail slots that are now unused. + if (dest < map->used) { + memset(&map->table[dest], 0, (map->used - dest) * sizeof(mp_map_elem_t)); + } + map->used = dest; + map->filled = dest; + + // Clear hash index and rebuild from the now-clean dense array. + memset(MP_MAP_GET_HASH_TABLE(map), 0, MP_MAP_INDEX_SIZE(map->alloc) * map->alloc); + mp_map_rebuild_hash_index(map); } +#endif // MICROPY_PY_MAP_ORDERED // MP_MAP_LOOKUP behaviour: // - returns NULL if not found, else the slot it was found in with key,value non-null @@ -188,18 +372,27 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ } } - // if the map is an ordered array then we must do a brute force linear search + #if MICROPY_PY_MAP_ORDERED + // When ordered maps are enabled, is_fixed maps use read-only linear scan. + if (map->is_fixed) { + for (mp_map_elem_t *elem = &map->table[0], *top = &map->table[map->used]; elem < top; elem++) { + if (elem->key == index || (!compare_only_ptrs && mp_obj_equal(elem->key, index))) { + MAP_CACHE_SET(index, elem - map->table); + return elem; + } + } + return NULL; + } + #else + // Original ordered array path for OrderedDict and fixed/ROM maps. if (map->is_ordered) { for (mp_map_elem_t *elem = &map->table[0], *top = &map->table[map->used]; elem < top; elem++) { if (elem->key == index || (!compare_only_ptrs && mp_obj_equal(elem->key, index))) { #if MICROPY_PY_COLLECTIONS_ORDEREDDICT if (MP_UNLIKELY(lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND)) { - // remove the found element by moving the rest of the array down mp_obj_t value = elem->value; --map->used; memmove(elem, elem + 1, (top - elem - 1) * sizeof(*elem)); - // put the found element after the end so the caller can access it if needed - // note: caller must NULL the value so the GC can clean up (e.g. see dict_get_helper). elem = &map->table[map->used]; elem->key = MP_OBJ_NULL; elem->value = value; @@ -214,7 +407,6 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ return NULL; } if (map->used == map->alloc) { - // TODO: Alloc policy map->alloc += 4; map->table = m_renew(mp_map_elem_t, map->table, map->used, map->alloc); mp_seq_clear(map->table, map->used, map->alloc, sizeof(*map->table)); @@ -230,6 +422,7 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ return NULL; #endif } + #endif // map is a hash table (not an ordered array), so do a hash lookup @@ -249,6 +442,87 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index)); } + #if MICROPY_PY_MAP_ORDERED + // Ordered hash table: dense key/value array with separate hash index table. + void *hash_table = MP_MAP_GET_HASH_TABLE(map); + size_t pos = hash % map->alloc; + size_t start_pos = pos; + for (;;) { + size_t idx = mp_map_hash_table_get(map, hash_table, pos); + // idx should be 0 (empty) or in [1, used]; stale entries are a bug. + assert(idx == 0 || idx <= map->used); + mp_map_elem_t *slot = NULL; + if (idx != 0) { + slot = &map->table[idx - 1]; + } + if (slot == NULL) { + // found NULL slot, so index is not in table + if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { + mp_map_hash_table_put(map, hash_table, pos, map->used + 1); + mp_map_elem_t *avail_slot = &map->table[map->used]; + map->used += 1; + map->filled += 1; + avail_slot->key = index; + avail_slot->value = MP_OBJ_NULL; + if (!mp_obj_is_qstr(index)) { + map->all_keys_are_qstrs = 0; + } + return avail_slot; + } else { + return NULL; + } + } else if (slot->key == MP_OBJ_SENTINEL) { + // found deleted slot, just skip it + } else if (slot->key == index || (!compare_only_ptrs && mp_obj_equal(slot->key, index))) { + // found index + // Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x + if (lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND) { + // Save value for caller before any table changes + mp_obj_t value = slot->value; + // delete element in this slot + map->filled -= 1; + slot->key = MP_OBJ_SENTINEL; + // Compact if tombstones exceed 50% of live entries to prevent unbounded growth. + // Skip if dict is now empty (no point compacting an empty dict). + if (map->filled > 0 && map->used - map->filled > map->filled / 2) { + mp_map_compact(map); + // After compact, original slot is invalid; return value in first empty slot + slot = &map->table[map->used]; + slot->key = MP_OBJ_NULL; + slot->value = value; + } + // Note: if no compact, slot->value is still valid from original location + return slot; + } + MAP_CACHE_SET(index, slot - map->table); + return slot; + } + + // not yet found, keep searching in this table + pos = (pos + 1) % map->alloc; + + if (pos == start_pos) { + // search got back to starting position, so index is not in table + if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { + if (map->filled < map->used) { + // Tombstones exist, compact in-place to reclaim space. + // This does not allocate, so is safe when heap is locked. + mp_map_compact(map); + } else { + // Genuinely full, need bigger table. + mp_map_rehash(map); + } + // restart the search for the new element + hash_table = MP_MAP_GET_HASH_TABLE(map); + start_pos = pos = hash % map->alloc; + } else { + return NULL; + } + } + } + + #else + // Original flat open-addressing hash table. size_t pos = hash % map->alloc; size_t start_pos = pos; mp_map_elem_t *avail_slot = NULL; @@ -319,6 +593,7 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ } } } + #endif // MICROPY_PY_MAP_ORDERED } /******************************************************************************/ diff --git a/py/moderrno.c b/py/moderrno.c index 58a141c102025..4ee6616c5fcb5 100644 --- a/py/moderrno.c +++ b/py/moderrno.c @@ -73,8 +73,9 @@ static const mp_obj_dict_t errorcode_dict = { .map = { .all_keys_are_qstrs = 0, // keys are integers .is_fixed = 1, - .is_ordered = 1, + _MP_MAP_IS_ORDERED_INIT .used = MP_ARRAY_SIZE(errorcode_table), + _MP_MAP_FILLED_INIT(MP_ARRAY_SIZE(errorcode_table)) .alloc = MP_ARRAY_SIZE(errorcode_table), .table = (mp_map_elem_t *)(mp_rom_map_elem_t *)errorcode_table, }, diff --git a/py/modthread.c b/py/modthread.c index 7742ff68471f1..891f5356f2a18 100644 --- a/py/modthread.c +++ b/py/modthread.c @@ -229,8 +229,8 @@ static mp_obj_t mod_thread_start_new_thread(size_t n_args, const mp_obj_t *args) mp_raise_TypeError(MP_ERROR_TEXT("expecting a dict for keyword args")); } mp_map_t *map = &((mp_obj_dict_t *)MP_OBJ_TO_PTR(args[2]))->map; - th_args = m_new_obj_var(thread_entry_args_t, args, mp_obj_t, pos_args_len + 2 * map->used); - th_args->n_kw = map->used; + th_args = m_new_obj_var(thread_entry_args_t, args, mp_obj_t, pos_args_len + 2 * mp_map_len(map)); + th_args->n_kw = mp_map_len(map); // copy across the keyword arguments for (size_t i = 0, n = pos_args_len; i < map->alloc; ++i) { if (mp_map_slot_is_filled(map, i)) { diff --git a/py/mpconfig.h b/py/mpconfig.h index 0951651e7d6a3..09a7b96c62e7a 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1625,6 +1625,21 @@ typedef time_t mp_timestamp_t; #define MICROPY_PY_COLLECTIONS_ORDEREDDICT (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) #endif +// Whether dicts use CPython-style ordered hash tables (dense array + sparse +// hash indices) that preserve insertion order, matching CPython 3.7+ semantics. +// When disabled, dicts use the original flat open-addressing hash table. +#ifndef MICROPY_PY_MAP_ORDERED +#define MICROPY_PY_MAP_ORDERED (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES) +#endif + +// Whether to support ordered dicts with >65535 elements (uint32_t hash indices). +// Only meaningful when MICROPY_PY_MAP_ORDERED is enabled. +#if MICROPY_PY_MAP_ORDERED +#ifndef MICROPY_PY_MAP_LARGE +#define MICROPY_PY_MAP_LARGE (0) +#endif +#endif + // Whether to provide the _asdict function for namedtuple #ifndef MICROPY_PY_COLLECTIONS_NAMEDTUPLE__ASDICT #define MICROPY_PY_COLLECTIONS_NAMEDTUPLE__ASDICT (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING) diff --git a/py/obj.h b/py/obj.h index 3c9122a69c0cc..08bd246d936e6 100644 --- a/py/obj.h +++ b/py/obj.h @@ -412,12 +412,21 @@ typedef struct _mp_rom_obj_t { mp_const_obj_t o; } mp_rom_obj_t; // These macros are used to define constant map/dict objects // You can put "static" in front of the definition to make it local +#if MICROPY_PY_MAP_ORDERED +#define _MP_MAP_FILLED_INIT(n) .filled = (n), +#define _MP_MAP_IS_ORDERED_INIT +#else +#define _MP_MAP_FILLED_INIT(n) +#define _MP_MAP_IS_ORDERED_INIT .is_ordered = 1, +#endif + #define MP_DEFINE_CONST_MAP(map_name, table_name) \ const mp_map_t map_name = { \ .all_keys_are_qstrs = 1, \ .is_fixed = 1, \ - .is_ordered = 1, \ + _MP_MAP_IS_ORDERED_INIT \ .used = MP_ARRAY_SIZE(table_name), \ + _MP_MAP_FILLED_INIT(MP_ARRAY_SIZE(table_name)) \ .alloc = MP_ARRAY_SIZE(table_name), \ .table = (mp_map_elem_t *)(mp_rom_map_elem_t *)table_name, \ } @@ -428,8 +437,9 @@ typedef struct _mp_rom_obj_t { mp_const_obj_t o; } mp_rom_obj_t; .map = { \ .all_keys_are_qstrs = 1, \ .is_fixed = 1, \ - .is_ordered = 1, \ + _MP_MAP_IS_ORDERED_INIT \ .used = n, \ + _MP_MAP_FILLED_INIT(n) \ .alloc = n, \ .table = (mp_map_elem_t *)(mp_rom_map_elem_t *)table_name, \ }, \ @@ -481,12 +491,24 @@ typedef struct _mp_rom_map_elem_t { typedef struct _mp_map_t { size_t all_keys_are_qstrs : 1; size_t is_fixed : 1; // if set, table is fixed/read-only and can't be modified + #if MICROPY_PY_MAP_ORDERED + size_t used : (4 * sizeof(size_t) - 1); // high-water mark in dense array + size_t filled : (4 * sizeof(size_t) - 1); // non-deleted entries (for O(1) len()) + #else size_t is_ordered : 1; // if set, table is an ordered array, not a hash map - size_t used : (8 * sizeof(size_t) - 3); + size_t used : (8 * sizeof(size_t) - 3); // number of live entries + #endif size_t alloc; mp_map_elem_t *table; } mp_map_t; +// Get the number of live entries in a map. +#if MICROPY_PY_MAP_ORDERED +#define mp_map_len(map) ((map)->filled) +#else +#define mp_map_len(map) ((map)->used) +#endif + // mp_set_lookup requires these constants to have the values they do typedef enum _mp_map_lookup_kind_t { MP_MAP_LOOKUP = 0, @@ -502,6 +524,7 @@ static inline bool mp_map_slot_is_filled(const mp_map_t *map, size_t pos) { void mp_map_init(mp_map_t *map, size_t n); void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table); +void mp_map_init_copy(mp_map_t *map, const mp_map_t *src); void mp_map_deinit(mp_map_t *map); mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t lookup_kind); void mp_map_clear(mp_map_t *map); diff --git a/py/objdict.c b/py/objdict.c index 692a7de42750f..987fdcc42be47 100644 --- a/py/objdict.c +++ b/py/objdict.c @@ -42,8 +42,9 @@ const mp_obj_dict_t mp_const_empty_dict_obj = { .map = { .all_keys_are_qstrs = 0, .is_fixed = 1, - .is_ordered = 1, + _MP_MAP_IS_ORDERED_INIT .used = 0, + _MP_MAP_FILLED_INIT(0) .alloc = 0, .table = NULL, } @@ -55,8 +56,13 @@ static mp_obj_t dict_update(size_t n_args, const mp_obj_t *args, mp_map_t *kwarg // the iteration is held in *cur and should be initialised with zero for the // first call. Will return NULL when no more elements are available. static mp_map_elem_t *dict_iter_next(mp_obj_dict_t *dict, size_t *cur) { - size_t max = dict->map.alloc; mp_map_t *map = &dict->map; + #if MICROPY_PY_MAP_ORDERED + // Ordered maps have entries dense in [0, used); no need to scan beyond. + size_t max = map->is_fixed ? map->alloc : map->used; + #else + size_t max = map->alloc; + #endif size_t i = *cur; for (; i < max; i++) { @@ -66,7 +72,7 @@ static mp_map_elem_t *dict_iter_next(mp_obj_dict_t *dict, size_t *cur) { } } - assert(map->used == 0 || i == max); + assert(mp_map_len(map) == 0 || i == max); return NULL; } @@ -115,7 +121,7 @@ mp_obj_t mp_obj_dict_make_new(const mp_obj_type_t *type, size_t n_args, size_t n mp_obj_t dict_out = mp_obj_new_dict(0); mp_obj_dict_t *dict = MP_OBJ_TO_PTR(dict_out); dict->base.type = type; - #if MICROPY_PY_COLLECTIONS_ORDEREDDICT + #if !MICROPY_PY_MAP_ORDERED && MICROPY_PY_COLLECTIONS_ORDEREDDICT if (type == &mp_type_ordereddict) { dict->map.is_ordered = 1; } @@ -133,9 +139,9 @@ static mp_obj_t dict_unary_op(mp_unary_op_t op, mp_obj_t self_in) { mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in); switch (op) { case MP_UNARY_OP_BOOL: - return mp_obj_new_bool(self->map.used != 0); + return mp_obj_new_bool(mp_map_len(&self->map) != 0); case MP_UNARY_OP_LEN: - return MP_OBJ_NEW_SMALL_INT(self->map.used); + return MP_OBJ_NEW_SMALL_INT(mp_map_len(&self->map)); #if MICROPY_PY_SYS_GETSIZEOF case MP_UNARY_OP_SIZEOF: { size_t sz = sizeof(*self) + sizeof(*self->map.table) * self->map.alloc; @@ -172,7 +178,7 @@ static mp_obj_t dict_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_ if (mp_obj_is_type(rhs_in, &mp_type_dict)) { mp_obj_dict_t *rhs = MP_OBJ_TO_PTR(rhs_in); - if (o->map.used != rhs->map.used) { + if (mp_map_len(&o->map) != mp_map_len(&rhs->map)) { return mp_const_false; } @@ -262,15 +268,10 @@ static MP_DEFINE_CONST_FUN_OBJ_1(dict_clear_obj, dict_clear); mp_obj_t mp_obj_dict_copy(mp_obj_t self_in) { mp_check_self(mp_obj_is_dict_or_ordereddict(self_in)); mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in); - mp_obj_t other_out = mp_obj_new_dict(self->map.alloc); - mp_obj_dict_t *other = MP_OBJ_TO_PTR(other_out); + mp_obj_dict_t *other = m_new_obj(mp_obj_dict_t); other->base.type = self->base.type; - other->map.used = self->map.used; - other->map.all_keys_are_qstrs = self->map.all_keys_are_qstrs; - other->map.is_fixed = 0; - other->map.is_ordered = self->map.is_ordered; - memcpy(other->map.table, self->map.table, self->map.alloc * sizeof(mp_map_elem_t)); - return other_out; + mp_map_init_copy(&other->map, &self->map); + return MP_OBJ_FROM_PTR(other); } static MP_DEFINE_CONST_FUN_OBJ_1(dict_copy_obj, mp_obj_dict_copy); @@ -355,9 +356,21 @@ static mp_obj_t dict_popitem(mp_obj_t self_in) { mp_check_self(mp_obj_is_dict_or_ordereddict(self_in)); mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in); mp_ensure_not_fixed(self); - if (self->map.used == 0) { + if (mp_map_len(&self->map) == 0) { mp_raise_msg(&mp_type_KeyError, MP_ERROR_TEXT("popitem(): dictionary is empty")); } + #if MICROPY_PY_MAP_ORDERED + // Scan backward from the high-water mark to find the last live entry (LIFO). + size_t cur = self->map.used; + while (cur > 0) { + --cur; + if (mp_map_slot_is_filled(&self->map, cur)) { + break; + } + } + mp_map_elem_t *next = &self->map.table[cur]; + assert(mp_map_slot_is_filled(&self->map, cur)); + #else size_t cur = 0; #if MICROPY_PY_COLLECTIONS_ORDEREDDICT if (self->map.is_ordered) { @@ -367,9 +380,19 @@ static mp_obj_t dict_popitem(mp_obj_t self_in) { mp_map_elem_t *next = dict_iter_next(self, &cur); assert(next); self->map.used--; - mp_obj_t items[] = {next->key, next->value}; - next->key = MP_OBJ_SENTINEL; // must mark key as sentinel to indicate that it was deleted + #endif + mp_obj_t key = next->key; + #if MICROPY_PY_MAP_ORDERED + // Delegate deletion to mp_map_lookup so tombstone/compact logic is in one place. + mp_map_elem_t *elem = mp_map_lookup(&self->map, key, MP_MAP_LOOKUP_REMOVE_IF_FOUND); + mp_obj_t items[] = {key, elem->value}; + elem->value = MP_OBJ_NULL; + #else + mp_obj_t items[] = {key, next->value}; + next->key = MP_OBJ_SENTINEL; next->value = MP_OBJ_NULL; + #endif + mp_obj_t tuple = mp_obj_new_tuple(2, items); return tuple; @@ -381,7 +404,7 @@ static mp_obj_t dict_update(size_t n_args, const mp_obj_t *args, mp_map_t *kwarg mp_obj_dict_t *self = MP_OBJ_TO_PTR(args[0]); mp_ensure_not_fixed(self); - mp_arg_check_num(n_args, kwargs->used, 1, 2, true); + mp_arg_check_num(n_args, mp_map_len(kwargs), 1, 2, true); if (n_args == 2) { // given a positional argument @@ -654,7 +677,7 @@ mp_obj_t mp_obj_new_dict(size_t n_args) { size_t mp_obj_dict_len(mp_obj_t self_in) { mp_obj_dict_t *self = MP_OBJ_TO_PTR(self_in); - return self->map.used; + return mp_map_len(&self->map); } mp_obj_t mp_obj_dict_store(mp_obj_t self_in, mp_obj_t key, mp_obj_t value) { diff --git a/py/objnamedtuple.c b/py/objnamedtuple.c index e8447ee31ef08..0bca67f3d37f3 100644 --- a/py/objnamedtuple.c +++ b/py/objnamedtuple.c @@ -51,7 +51,9 @@ static mp_obj_t namedtuple_asdict(mp_obj_t self_in) { // make it an OrderedDict mp_obj_dict_t *dictObj = MP_OBJ_TO_PTR(dict); dictObj->base.type = &mp_type_ordereddict; + #if !MICROPY_PY_MAP_ORDERED dictObj->map.is_ordered = 1; + #endif for (size_t i = 0; i < self->tuple.len; ++i) { mp_obj_dict_store(dict, MP_OBJ_NEW_QSTR(fields[i]), self->tuple.items[i]); } diff --git a/py/runtime.c b/py/runtime.c index 618e9b5ae41cf..34fe471be46b6 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -874,7 +874,7 @@ void mp_call_prepare_args_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ // dictionary mp_map_t *map = mp_obj_dict_get_map(kw_value); // should have enough, since kw_dict_len is in this case hinted correctly above - assert(args2_len + 2 * map->used <= args2_alloc); + assert(args2_len + 2 * mp_map_len(map) <= args2_alloc); for (size_t j = 0; j < map->alloc; j++) { if (mp_map_slot_is_filled(map, j)) { // the key must be a qstr, so intern it if it's a string diff --git a/tests/basics/dict_compact_add.py b/tests/basics/dict_compact_add.py new file mode 100644 index 0000000000000..7f7b8bb91f386 --- /dev/null +++ b/tests/basics/dict_compact_add.py @@ -0,0 +1,87 @@ +# Check if dicts preserve insertion order (MICROPY_PY_MAP_ORDERED). +if list({2: 0, 1: 0, 3: 0}.keys()) != [2, 1, 3]: + print("SKIP") + raise SystemExit + + +# Test that in-place compact triggered by add (dense array full with tombstones) +# correctly preserves all entries and allows the new add to succeed. + +# Fill a dict, delete entries to create tombstones, then add enough new entries +# to trigger compact-on-add (wrap-around in hash probe). Verify all entries. +d = {} +for i in range(10): + d[i] = i * 10 + +# Delete half the entries (creates tombstones in dense array). +for i in range(0, 10, 2): + del d[i] + +# Add new entries. Eventually the dense array fills with tombstones + live entries, +# triggering in-place compact on wrap-around instead of rehash. +for i in range(100, 120): + d[i] = i * 10 + +# Verify all expected keys are present with correct values. +for i in range(1, 10, 2): + assert d[i] == i * 10, "original key {} has wrong value".format(i) +for i in range(100, 120): + assert d[i] == i * 10, "new key {} has wrong value".format(i) + +# Verify deleted keys are gone. +for i in range(0, 10, 2): + assert i not in d, "deleted key {} still present".format(i) + +print("compact-on-add: OK") +print("len:", len(d)) + +# Test insertion order is preserved after compact-on-add. +keys = list(d.keys()) +expected = [1, 3, 5, 7, 9] + list(range(100, 120)) +assert keys == expected, "order wrong: {}".format(keys) +print("order: OK") + + +# Test with non-qstr keys (tuples have __hash__ via mp_unary_op). +d2 = {} +for i in range(8): + d2[(i, "x")] = i + +for i in range(0, 8, 2): + del d2[(i, "x")] + +for i in range(100, 110): + d2[(i, "x")] = i + +# Verify all entries. +for i in range(1, 8, 2): + assert d2[(i, "x")] == i +for i in range(100, 110): + assert d2[(i, "x")] == i + +print("non-qstr keys: OK") + + +# Test hash index rebuild correctness after compact. +# After compact, all probe chains in the hash index are rebuilt from scratch. +# Use enough entries that the hash index is stressed across multiple probe steps. +d3 = {} +for i in range(12): + d3[i] = i + +for i in [0, 1, 2, 3]: + del d3[i] + +# Adding 10 more entries forces the dense array to fill and triggers compact. +# The rebuilt hash index must correctly resolve all existing keys. +for i in range(20, 30): + d3[i] = i + +for i in range(4, 12): + assert d3[i] == i, "key {} not found after hash rebuild".format(i) +for i in range(20, 30): + assert d3[i] == i, "new key {} not found after hash rebuild".format(i) +for i in range(0, 4): + assert i not in d3 + +print("hash index rebuild: OK") diff --git a/tests/basics/dict_popitem_ordered.py b/tests/basics/dict_popitem_ordered.py new file mode 100644 index 0000000000000..7dcbaed41880f --- /dev/null +++ b/tests/basics/dict_popitem_ordered.py @@ -0,0 +1,55 @@ +# Check if dicts preserve insertion order (MICROPY_PY_MAP_ORDERED). +if list({2: 0, 1: 0, 3: 0}.keys()) != [2, 1, 3]: + print("SKIP") + raise SystemExit + + +# Test dict.popitem() returns items in LIFO order and leaves the dict consistent. + +# Pop entire dict, verify all entries are returned in LIFO order. +d = {} +for i in range(5): + d[i] = i * 10 +items = [] +while d: + items.append(d.popitem()) +print("all items:", items) + +# Interleaved popitem and add: verify remaining dict is consistent. +d = {1: 10, 2: 20, 3: 30} +print(d.popitem()) +d[4] = 40 +d[5] = 50 +print(d.popitem()) +print(d.popitem()) +print("remaining:", sorted(d.items())) + +# popitem after del (mixed operations). +d = {} +for i in range(8): + d[i] = i +del d[3] +del d[5] +# Remaining: 0,1,2,4,6,7 -- popitem should pop from end. +p1 = d.popitem() +p2 = d.popitem() +print("after del pops:", p1, p2) +# Verify remaining keys are accessible. +for k in list(d.keys()): + assert d[k] == k, "key {} has wrong value after mixed ops".format(k) +print("remaining keys:", sorted(d.keys())) + +# Pop everything after deletes. +d = {1: 10, 2: 20, 3: 30, 4: 40, 5: 50} +del d[2] +del d[4] +items = [] +while d: + items.append(d.popitem()) +print("pop-all after del:", items) + +# Verify popitem on empty dict raises KeyError. +try: + {}.popitem() +except KeyError: + print("empty popitem: KeyError") diff --git a/tests/basics/slice_optimise.py b/tests/basics/slice_optimise.py index f663e16b8c2f9..4d878835ef345 100644 --- a/tests/basics/slice_optimise.py +++ b/tests/basics/slice_optimise.py @@ -16,8 +16,17 @@ except KeyError as e: print("KeyError", e.args) -# Put a slice and another object into an OrderedDict, and retrieve them. +# Slice-as-key in OrderedDict: when backed by the ordered hash table +# (MICROPY_PY_MAP_ORDERED=1) slices are not hashable so TypeError is raised. +# When backed by linear scan (MAP_ORDERED=0) slices work as keys. x = OrderedDict() -x[:"a"] = 1 -x["b"] = 2 -print(list(x.keys()), list(x.values())) +try: + x[:"a"] = 1 + x["b"] = 2 + # Linear scan path: verify keys and values are correct. + assert list(x.keys()) == [slice(None, "a", None), "b"] + assert list(x.values()) == [1, 2] + print("slice key OK") +except TypeError: + # Hash table path: slices not hashable, this is expected. + print("slice key TypeError") diff --git a/tests/basics/slice_optimise.py.exp b/tests/basics/slice_optimise.py.exp index 3fa59aae15ae6..70cade87feeb2 100644 --- a/tests/basics/slice_optimise.py.exp +++ b/tests/basics/slice_optimise.py.exp @@ -1,2 +1,2 @@ KeyError (slice(None, None, None),) -[slice(None, 'a', None), 'b'] [1, 2] +slice key TypeError diff --git a/tests/micropython/dict_compact_empty.py b/tests/micropython/dict_compact_empty.py new file mode 100644 index 0000000000000..c80c80259a88d --- /dev/null +++ b/tests/micropython/dict_compact_empty.py @@ -0,0 +1,38 @@ +# Test that deleting the last element doesn't cause issues. +# Compaction is skipped when dict becomes empty (filled == 0). + +# Test 1: Delete all entries one by one +d = {1: "a", 2: "b", 3: "c"} +del d[1] +del d[2] +del d[3] +print(len(d)) + +# Dict should work normally after being emptied +d[10] = "x" +print(d[10]) +print(len(d)) + +# Test 2: Single element dict +d2 = {42: "only"} +del d2[42] +print(len(d2)) +d2[100] = "new" +print(d2[100]) + +# Test 3: Empty via popitem +d3 = {"a": 1, "b": 2} +d3.popitem() +d3.popitem() +print(len(d3)) +d3["c"] = 3 +print(d3["c"]) + +# Test 4: Repeated empty/fill cycles +d4 = {} +for cycle in range(5): + for i in range(10): + d4[i] = cycle + for i in range(10): + del d4[i] +print("cycles OK, len:", len(d4)) diff --git a/tests/micropython/dict_compact_empty.py.exp b/tests/micropython/dict_compact_empty.py.exp new file mode 100644 index 0000000000000..b62250e167d90 --- /dev/null +++ b/tests/micropython/dict_compact_empty.py.exp @@ -0,0 +1,8 @@ +0 +x +1 +0 +new +0 +3 +cycles OK, len: 0 diff --git a/tests/micropython/dict_compact_order.py b/tests/micropython/dict_compact_order.py new file mode 100644 index 0000000000000..2f8e30cc96329 --- /dev/null +++ b/tests/micropython/dict_compact_order.py @@ -0,0 +1,44 @@ +# Check if dicts preserve insertion order (MICROPY_PY_MAP_ORDERED). +if list({2: 0, 1: 0, 3: 0}.keys()) != [2, 1, 3]: + print("SKIP") + raise SystemExit + + +# Test that dict ordering is preserved after compaction. +# When tombstones exceed 50% of live entries, the dict compacts. +# This must preserve insertion order. + +d = {} +for i in range(100): + d[i] = i + +# Delete 67 entries: filled=33, tombstones=67 +# Threshold = 33/2 = 16, and 67 > 16, so compaction triggers +for i in range(67): + del d[i] + +# Remaining keys must be in insertion order +print(list(d.keys())) + +# Values should also be correct (spot check) +print(d[67], d[80], d[99]) + +# Dict should still be usable after compaction +d[200] = 200 +d[201] = 201 +keys = list(d.keys()) +print(keys[-3], keys[-2], keys[-1]) + +# Test with mixed key types (strings and ints) +d2 = {} +for i in range(20): + d2[i] = i + d2["key" + str(i)] = "val" + str(i) + +# Delete enough to trigger compaction +for i in range(15): + del d2[i] + del d2["key" + str(i)] + +# Remaining should preserve insertion order +print(list(d2.keys())) diff --git a/tests/micropython/dict_compact_order.py.exp b/tests/micropython/dict_compact_order.py.exp new file mode 100644 index 0000000000000..a4e484fe74e98 --- /dev/null +++ b/tests/micropython/dict_compact_order.py.exp @@ -0,0 +1,4 @@ +[67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] +67 80 99 +99 200 201 +[15, 'key15', 16, 'key16', 17, 'key17', 18, 'key18', 19, 'key19'] diff --git a/tests/micropython/dict_compact_stress.py b/tests/micropython/dict_compact_stress.py new file mode 100644 index 0000000000000..66f012110b234 --- /dev/null +++ b/tests/micropython/dict_compact_stress.py @@ -0,0 +1,56 @@ +# Stress test for dict compaction. +# Without compaction, repeated add/delete cycles would cause unbounded +# memory growth from tombstone accumulation. + +try: + import gc +except ImportError: + print("SKIP") + raise SystemExit + +d = {} + +# Run many add/delete cycles +# Without compaction, this would accumulate 50 * 100 = 5000 tombstones +for cycle in range(50): + # Add entries + for i in range(100): + d[i] = i * cycle + + # Delete all entries + for i in range(100): + del d[i] + +# If we got here without MemoryError, basic compaction is working +print("pass1") + +# Test with popitem too +d2 = {} +for cycle in range(50): + for i in range(100): + d2[i] = i + for _ in range(100): + d2.popitem() + +print("pass2") + +# Test mixed operations +d3 = {} +for cycle in range(30): + # Add 200 entries + for i in range(200): + d3[i] = i + + # Delete half via del + for i in range(100): + del d3[i] + + # Delete rest via popitem + for _ in range(100): + d3.popitem() + +print("pass3") + +# Force GC and verify we haven't leaked too much memory +gc.collect() +print("pass4") diff --git a/tests/micropython/dict_compact_stress.py.exp b/tests/micropython/dict_compact_stress.py.exp new file mode 100644 index 0000000000000..0d208e1950ebf --- /dev/null +++ b/tests/micropython/dict_compact_stress.py.exp @@ -0,0 +1,4 @@ +pass1 +pass2 +pass3 +pass4 diff --git a/tests/micropython/dict_compact_threshold.py b/tests/micropython/dict_compact_threshold.py new file mode 100644 index 0000000000000..02bc7d61f2ec8 --- /dev/null +++ b/tests/micropython/dict_compact_threshold.py @@ -0,0 +1,46 @@ +# Check if dicts preserve insertion order (MICROPY_PY_MAP_ORDERED). +if list({2: 0, 1: 0, 3: 0}.keys()) != [2, 1, 3]: + print("SKIP") + raise SystemExit + +# Test compaction threshold boundary. +# Compaction triggers when: tombstones > filled / 2 + +# Start with 30 entries +d = {} +for i in range(30): + d[i] = i + +# Delete entries one by one and verify dict remains consistent +for i in range(25): + del d[i] + # Verify remaining entries are correct + expected = list(range(i + 1, 30)) + if list(d.keys()) != expected: + print("FAIL at deletion", i) + break +else: + print("sequential delete OK") + +# Verify values are still correct after deletions (spot check) +d2 = {} +for i in range(50): + d2[i] = i * 2 +for i in range(40): + del d2[i] + +print(list(d2.keys())) +print(d2[40], d2[45], d2[49]) + +# Test with different deletion patterns (non-sequential) +d3 = {} +for i in range(100): + d3[i] = i +# Delete every other entry +for i in range(0, 100, 2): + del d3[i] + +# Should have odd numbers in order +keys = list(d3.keys()) +print(keys[0], keys[1], keys[2], keys[3], keys[4], keys[5], keys[6], keys[7], keys[8], keys[9]) +print(keys[-5], keys[-4], keys[-3], keys[-2], keys[-1]) diff --git a/tests/micropython/dict_compact_threshold.py.exp b/tests/micropython/dict_compact_threshold.py.exp new file mode 100644 index 0000000000000..e2c399b5c3d0e --- /dev/null +++ b/tests/micropython/dict_compact_threshold.py.exp @@ -0,0 +1,5 @@ +sequential delete OK +[40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +80 90 98 +1 3 5 7 9 11 13 15 17 19 +91 93 95 97 99 diff --git a/tests/micropython/heapalloc_dict.py b/tests/micropython/heapalloc_dict.py new file mode 100644 index 0000000000000..3243159916a4b --- /dev/null +++ b/tests/micropython/heapalloc_dict.py @@ -0,0 +1,73 @@ +# Test dict operations under heap lock. +# The ordered hash table implementation must handle delete+add cycles +# without requiring heap allocation (rehash) by compacting in-place. + +try: + import micropython +except ImportError: + print("SKIP") + raise SystemExit + +# Check for stackless build, which can't call functions without +# allocating a frame on heap. +try: + + def stackless(): + pass + + micropython.heap_lock() + stackless() + micropython.heap_unlock() +except RuntimeError: + print("SKIP") + raise SystemExit + +# Test 1: delete+add cycles under heap lock. +d = {} +for i in range(6): + d[i] = i + +micropython.heap_lock() +try: + for i in range(6, 20): + del d[i - 6] + d[i] = i + print("dict delete+add cycles: OK") +except MemoryError: + print("dict delete+add cycles: MemoryError") +micropython.heap_unlock() + +# Verify contents are correct after heap-locked cycles. +print("post-lock contents:", sorted(d.keys()), sorted(d.values())) + +# Test 2: Same with smaller dict. +gd = {} +for i in range(4): + gd[i] = i + +micropython.heap_lock() +try: + for i in range(4, 12): + del gd[i - 4] + gd[i] = i + print("small dict cycles: OK") +except MemoryError: + print("small dict cycles: MemoryError") +micropython.heap_unlock() + +# Test 3: Delete all entries then add under heap lock (all-tombstone compact). +d3 = {} +for i in range(6): + d3[i] = i +micropython.heap_lock() +try: + for i in range(6): + del d3[i] + # Dict is now empty but dense array is full of tombstones. + # Adding should trigger in-place compact (filled=0 < used=6), then succeed. + d3["new"] = 99 + print("all-tombstone add: OK") +except MemoryError: + print("all-tombstone add: MemoryError") +micropython.heap_unlock() +print("after all-tombstone:", list(d3.keys()), list(d3.values())) diff --git a/tests/micropython/heapalloc_dict.py.exp b/tests/micropython/heapalloc_dict.py.exp new file mode 100644 index 0000000000000..64cb192569bfe --- /dev/null +++ b/tests/micropython/heapalloc_dict.py.exp @@ -0,0 +1,5 @@ +dict delete+add cycles: OK +post-lock contents: [14, 15, 16, 17, 18, 19] [14, 15, 16, 17, 18, 19] +small dict cycles: OK +all-tombstone add: OK +after all-tombstone: ['new'] [99] diff --git a/tests/micropython/perf_dict_ops.py b/tests/micropython/perf_dict_ops.py new file mode 100644 index 0000000000000..af74442ad823d --- /dev/null +++ b/tests/micropython/perf_dict_ops.py @@ -0,0 +1,105 @@ +# Benchmark dict operations: creation, lookup, insert, delete, iteration. + +# Dict sizes to test +SMALL = 10 +MEDIUM = 100 +LARGE = 500 + + +def bm_setup(params): + import time + + N = params[0] + ITERS = params[1] + return N, ITERS, time.ticks_us, time.ticks_diff + + +def bm_run(N, ITERS, ticks_us, ticks_diff): + # 1. Dict creation (comprehension) + t0 = ticks_us() + for _ in range(ITERS): + d = {i: i for i in range(MEDIUM)} + t1 = ticks_us() + print("create_100:", ticks_diff(t1, t0)) + + # 2. Insertion into empty dict + t0 = ticks_us() + for _ in range(ITERS): + d = {} + for i in range(MEDIUM): + d[i] = i + t1 = ticks_us() + print("insert_100:", ticks_diff(t1, t0)) + + # 3. Lookup hit (qstr keys -- simulates attribute access) + d = {} + keys = [] + for i in range(MEDIUM): + k = "key_{}".format(i) + d[k] = i + keys.append(k) + t0 = ticks_us() + for _ in range(ITERS): + for k in keys: + d[k] + t1 = ticks_us() + print("lookup_qstr_100:", ticks_diff(t1, t0)) + + # 4. Lookup hit (int keys) + d = {i: i for i in range(MEDIUM)} + t0 = ticks_us() + for _ in range(ITERS): + for i in range(MEDIUM): + d[i] + t1 = ticks_us() + print("lookup_int_100:", ticks_diff(t1, t0)) + + # 5. Lookup miss + d = {i: i for i in range(MEDIUM)} + t0 = ticks_us() + for _ in range(ITERS): + for i in range(MEDIUM, MEDIUM * 2): + i in d + t1 = ticks_us() + print("lookup_miss_100:", ticks_diff(t1, t0)) + + # 6. Iteration + d = {i: i for i in range(MEDIUM)} + t0 = ticks_us() + for _ in range(ITERS * 10): + for k in d: + pass + t1 = ticks_us() + print("iterate_100:", ticks_diff(t1, t0)) + + # 7. Delete + add cycles (tombstone/compact stress) + d = {i: i for i in range(MEDIUM)} + t0 = ticks_us() + for _ in range(ITERS): + for i in range(MEDIUM): + del d[i] + d[i + MEDIUM] = i + for i in range(MEDIUM): + del d[i + MEDIUM] + d[i] = i + t1 = ticks_us() + print("del_add_cycle_100:", ticks_diff(t1, t0)) + + # 8. popitem drain + t0 = ticks_us() + for _ in range(ITERS): + d = {i: i for i in range(MEDIUM)} + while d: + d.popitem() + t1 = ticks_us() + print("popitem_drain_100:", ticks_diff(t1, t0)) + + # 9. Global variable access (most critical path) + # This is measured at module level below + pass + + +bm_params = { + (1000, 100): (MEDIUM, 100), + (5000, 100): (MEDIUM, 500), +} diff --git a/tools/cc1 b/tools/cc1 index aa2534f01e7bb..901f8b707b156 100755 --- a/tools/cc1 +++ b/tools/cc1 @@ -44,10 +44,10 @@ print_debug = False re_preproc_line = re.compile(r"# [0-9]+ ") re_map_entry = re.compile(r"\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},") re_mp_obj_dict_t = re.compile( - r"(?P
(static )?const mp_obj_dict_t (?P