Yet another blog, by yet another person.
Random stuff about Python, Java, Scala, and other random stuff.
This entry follows the evolution of a somewhat naïve implementation of a dictionary which normalizes its keys or values, or both.
The main requirement is that the implementation should preserve the standard Python dict interface (including those for constructors and factories) and avoid re-implementing anything that is already provided by the standard dict.
The first version was a simple normalized key map (doc-strings removed):
class NormalizedKeyMap(dict):
def __init__(self, normalize_key, init=None, **kwargs):
from itertools import chain
self.__normalize_key = normalize_key
if init is None:
dict.__init__(self, ((self.__normalize_key(k), v)
for k, v in kwargs.iteritems()))
elif isinstance(init, dict):
dict.__init__(self, ((self.__normalize_key(k), v)
for k, v in chain(init.iteritems(), kwargs.iteritems())))
else: # assumes iterable (k, v) pair
dict.__init__(self, ((self.__normalize_key(k), v)
for k, v in chain(init, kwargs.iteritems())))
def __setitem__(self, key, value):
dict.__setitem__(self, self.__normalize_key(key), value)
def __getitem__(self, key):
return dict.__getitem__(self, self.__normalize_key(key))
def __delitem__(self, key):
dict.__delitem__(self, self.__normalize_key(key))
The second version normalized both keys and values (see below; docstrings removed):
class NormalizedKeyValueMap(dict):
def __init__(self, normalize_key, normalize_value, init=None, **kwargs):
from itertools import chain
self.__normalize_key = normalize_key
self.__normalize_value = normalize_value
if init is None:
dict.__init__(self, ((self.__normalize_key(k), self.__normalize_value(v))
for k, v in kwargs.iteritems()))
elif isinstance(init, dict):
dict.__init__(self, ((self.__normalize_key(k), self.__normalize_value(v))
for k, v in chain(init.iteritems(), kwargs.iteritems())))
else: # assumes iterable (k, v) pair
dict.__init__(self, ((self.__normalize_key(k), self.__normalize_value(v))
for k, v in chain(init, kwargs.iteritems())))
def __setitem__(self, key, value):
dict.__setitem__(self, self.__normalize_key(key), self.__normalize_value(value))
def __getitem__(self, key):
return dict.__getitem__(self, self.__normalize_key(key))
def __delitem__(self, key):
dict.__delitem__(self, self.__normalize_key(key))
These implementations made all the normalization functions it needed as required parameters to the constructor. This changes the interface for the constructor considerably.
Furthermore, this does not support all the dictionary operations like in, has_key, pop, setdefault, and update().
The next version combines the above two into a single class with optional normalization functions. Now, the general interface is similar to that of the standard dict, except for the special keyword arguments “normalize_key” and “normalize_value”. It also adds additional methods to provide the missing functionality.
Following is the final version (docstrings removed):
class NormalizedMap(dict):
def __init__(self, init=None, **kwargs):
self.__normalize_key = kwargs.pop("normalize_key", lambda k: k)
self.__normalize_value = kwargs.pop("normalize_value", lambda v: v)
init_items = NormalizedMap.__merge_items(init, **kwargs)
dict.__init__(self, ((self.__normalize_key(k), self.__normalize_value(v)) for k, v in init_items))
@staticmethod
def __merge_items(iterable, **kwargs):
from itertools import chain
if iterable is None:
merged_items = kwargs.iteritems()
elif isinstance(iterable, dict):
merged_items = chain(iterable.iteritems(), kwargs.iteritems())
else: # assumes iterable (k, v) pair
merged_items = chain(iterable, kwargs.iteritems())
return merged_items
def __setitem__(self, key, value):
dict.__setitem__(self, self.__normalize_key(key), self.__normalize_value(value))
def __getitem__(self, key):
return dict.__getitem__(self, self.__normalize_key(key))
def __delitem__(self, key):
dict.__delitem__(self, self.__normalize_key(key))
def __contains__(self, key):
return dict.__contains__(self, self.__normalize_key(key))
def has_key(self, key):
return self.__contains__(key)
def pop(self, key, *args):
return dict.pop(self, self.__normalize_key(key), *args)
def setdefault(self, key, defaultval=None):
return dict.setdefault(self, self.__normalize_key(key),
(None if defaultval is None else self.__normalize_value(defaultval)))
def update(self, iterable=None, **kwargs):
dict.update(self, ((self.__normalize_key(k), self.__normalize_value(v))
for k, v in NormalizedMap.__merge_items(iterable, **kwargs)))
The missing functionality for in, has_key, pop, and setdefault were added through simple method overrides with normalization to the key and value (as needed).
A quick test transcript for NormalizedMap follows:
>>> nm = NormalizedMap({"rat": 'A', "Rat": 'E', "maT": 'I'}, normalize_key=lambda x: x.lower())
>>> nm
{'rat': 'E', 'mat': 'I'}
>>> nm = NormalizedMap({"rat": 'A', "Rat": 'B', "maT": 'C'}, RAT='D', lat='E', normalize_key=lambda x: x.lower(), normalize_value=lambda x: x.lower())
>>> nm
{'lat'='e', 'rat': 'd', 'mat': 'c'}
>>> nm = NormalizedMap({"rat": 'A', "Rat": 'B', "maT": 'C'}, RAT='D', lat='E', normalize_key=lambda x: x.lower())
>>> nm
{'lat'='E', 'rat': 'D', 'mat': 'C'}
>>> nm = NormalizedMap({"rat": 'A', "Rat": 'B', "maT": 'C'}, RAT='D', lat='E', normalize_value=lambda x: x.lower())
>>> nm
{'lat': 'e', 'rat': 'a', 'RAT': 'd', 'Rat': 'b', 'maT': 'c'}
>>> nm = NormalizedMap((("rat", 'A'), ("Rat", 'B'), ("maT", 'C')), normalize_key=lambda x: x.lower())
>>> nm
{'rat': 'B', 'mat': 'C'}
>>> nm = NormalizedMap(rat='A', RAT='B', normalize_key=lambda x: x.lower(), normalize_value=lambda x: x.lower())
>>> nm
{'rat': 'a'}
>>> nm = NormalizedMap(normalize_key=lambda x: x.lower(), normalize_value=lambda x: x.lower())
>>> nm
{}
>>> nm["Rat"] = "K"
>>> nm
{'rat': 'k'}
>>> nm["RAt"] = "M"
>>> nm
{'rat': 'm'}
>>> nm["RAT"]
'm'
>>> del(nm["raT"])
>>> nm
{}
>>> nm = NormalizedMap({"rat": 'A', "Rat": 'Tar', "maT": 'I'}, normalize_key=lambda x: x.lower(), normalize_value=lambda x: x.lower())
>>> nm
{'rat': 'tar', 'mat': 'i'}
>>> 'RAT' in nm
True
>>> nm.has_key('RAT')
True
>>> nm.pop('rAt', 'Eeek')
tar
>>> nm.update(MAT='UU', Rat='EE')
>>> nm
{'rat': 'ee', 'mat': 'uu'}
>>> nm.update((('fat', 'Sat'), ('mAt', 'LuX')), FAT='Mon')
>>> nm
{'rat': 'ee', 'mat': 'lux', 'fat': 'mon'}
>>> nm.setdefault('FAT', '777')
'mon'
>>> nm
{'rat': 'ee', 'mat': 'lux', 'fat': 'mon'}
>>> nm.setdefault('dUn', 'bEe')
'bee'
>>> nm
{'rat': 'ee', 'dun': 'bee', 'mat': 'lux', 'fat': 'mon'}
As the title of this entry indicates, this is a naïve implementation, and there are much better implementations out there, especially when performance is concerned.
For instance, consider Fuzzyman (Michael Foord)'s recipie at ASPN for a dictionary with case-insensitive keys (one of the specific cases of normalized dictionaries). This implementation is more complete than the NormalizedMap given above.
The implementation above uses Python 2.5-specific language features.
Labels: normalized dictionary, normalized map, python
Posted by FiniteState42i @11:21 | Permanent Link
Subscribe to
Posts [Atom]