Initial code commit - beta.

This commit is contained in:
darko-poljak 2013-10-18 19:29:34 +02:00
parent eaffb4d8ad
commit c49e9d1f54
8 changed files with 843 additions and 5 deletions

View file

@ -652,7 +652,7 @@ Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
anderssontree Copyright (C) 2013 Darko Poljak
aatree Copyright (C) 2013 Darko Poljak
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.

View file

@ -1,4 +0,0 @@
anderssontree
=============
Andersson Tree Python module.

88
README.rst Normal file
View file

@ -0,0 +1,88 @@
AATree Package
===================
Abstract
========
This package provides Andersson Tree implementation written in pure Python.
Sources of Algorithms
---------------------
http://en.wikipedia.org/wiki/Andersson_tree
http://user.it.uu.se/~arnea/abs/simp.html
http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_andersson.aspx
Some concepts are inspired by bintrees package at
http://bitbucket.org/mozman/bintrees, although this implementation does not
support dict, heap, set compatibility.
Constructor
~~~~~~~~~~~
* AnderssonTree() -> new empty tree;
* AnderssonTree(mapping) -> new tree initialized from a mapping (requires only an items() method)
* AnderssonTree(seq) -> new tree initialized from seq [(k1, v1), (k2, v2), ... (kn, vn)]
Methods
~~~~~~~
* __contains__(k) -> True if T has a key k, else False
* __delitem__(y) <==> del T[y]
* __getitem__(y) <==> T[y]
* __iter__() <==> iter(T) <==> keys()
* __len__() <==> len(T)
* __repr__() <==> repr(T)
* __reversed__() <==> reversed(T), reversed keys
* __setitem__(k, v) <==> T[k] = v
* __copy__() <==> copy()
* clear() -> None, remove all items from T
* copy() -> a shallow copy of T, tree structure, i.e. key insertion order is preserved
* dump([order]) -> None, dumps tree according to order
* get(k) -> T[k] if k in T, else None
* insert(k, v) -> None, insert node with key k and value v, replace value if key exists
* is_empty() -> True if len(T) == 0
* iter_items([, reverse]) -> generator for (k, v) items of T
* keys([reverse]) -> generator for keys of T
* remove(key) -> None, remove item by key
* remove_items(keys) -> None, remove items by keys
* root() -> root node
* traverse(f, [order]) -> visit all nodes of tree according to order and call f(node) for each node
* update(E) -> None. Update T from dict/iterable E
* values([reverse]) -> generator for values of T
Order values
~~~~~~~~~~~~
* ORDER_INFIX_LEFT_RIGHT - infix order, left child first, then right
* ORDER_INFIX_RIGHT_LEFT - infix order, right child first, then left
* ORDER_PREFIX_LEFT_RIGHT - prefix order, left child first, then right
* ORDER_PREFIX_RIGHT_LEFT - prefix order, right child first, then left
* ORDER_POSTFIX_LEFT_RIGHT - postfix order, left child first, then right
* ORDER_POSTFIX_RIGHT_LEFT - postfix order, right child first, then left
Installation
============
from source::
python setup.py install
or from PyPI::
pip install anderssontree
Documentation
=============
this README.rst, code itself, docstrings
bintrees can be found on github.com at:
https://github.com/darko-poljak/andersontree
Tested With
===========
Python2.7.5, Python3.3.2

12
anderssontree/__init__.py Normal file
View file

@ -0,0 +1,12 @@
from __future__ import absolute_import
__all__ = ['AATree', 'AnderssonTree', 'ORDER_INFIX_LEFT_RIGHT',
'ORDER_INFIX_RIGHT_LEFT', 'ORDER_PREFIX_LEFT_RIGHT',
'ORDER_PREFIX_RIGHT_LEFT', 'ORDER_POSTFIX_LEFT_RIGHT',
'ORDER_POSTFIX_RIGHT_LEFT'
]
from .anderssontree import AATree, AnderssonTree, ORDER_INFIX_LEFT_RIGHT, \
ORDER_INFIX_RIGHT_LEFT, ORDER_PREFIX_LEFT_RIGHT, ORDER_PREFIX_RIGHT_LEFT, \
ORDER_POSTFIX_LEFT_RIGHT, ORDER_POSTFIX_RIGHT_LEFT

View file

@ -0,0 +1,407 @@
#!/usr/bin/env python
# Author: Darko Poljak <darko.poljak@gmail.com>
# License: GPLv3
__all__ = [
'AATree', 'AnderssonTree', 'ORDER_INFIX_LEFT_RIGHT',
'ORDER_INFIX_RIGHT_LEFT', 'ORDER_PREFIX_LEFT_RIGHT',
'ORDER_PREFIX_RIGHT_LEFT', 'ORDER_POSTFIX_LEFT_RIGHT',
'ORDER_POSTFIX_RIGHT_LEFT'
]
ORDER_INFIX_LEFT_RIGHT = 0
ORDER_INFIX_RIGHT_LEFT = 1
ORDER_PREFIX_LEFT_RIGHT = 2
ORDER_PREFIX_RIGHT_LEFT = 3
ORDER_POSTFIX_LEFT_RIGHT = 4
ORDER_POSTFIX_RIGHT_LEFT = 5
class Node(object):
"""Internal object, represents a tree node."""
__slots__ = ['key', 'value', 'left', 'right', 'level']
def __init__(self, key, value, level):
self.key = key
self.value = value
self.level = level
self.left = None
self.right = None
def __getitem__(self, key):
"""x[key], where key is 0 (left) or 1 (right)"""
return self.left if key == 0 else self.right
def __setitem__(self, key, value):
"""x[key]=value, where key is 0 (left) or 1 (right)"""
if key == 0:
self.left = value
else:
self.right = value
def free(self):
"""Set references to None."""
self.left = None
self.right = None
self.value = None
self.key = None
def __repr__(self):
return "%s(%r, %r, %r)" % (self.__class__.__name__, self.key,
self.value, self.level)
def copy(self):
return Node(self.key, self.value, self.level)
class _AATree(object):
"""
AATree implements a balanced Andersson tree.
An AA tree in computer science is a form of balanced tree used for
storing and retrieving ordered data efficiently. AA trees are named
for Arne Andersson, their inventor.
AA trees are a variation of the red-black tree, which in turn is an
enhancement to the binary search tree. Unlike red-black trees, red
nodes on an AA tree can only be added as a right subchild. In other
words, no red node can be a left sub-child. This results in the
imulation of a 2-3 tree instead of a 2-3-4 tree, which greatly
simplifies the maintenance operations. The maintenance algorithms
for a red-black tree need to consider seven different shapes to
properly balance the tree:
* * * * * * *
\ / / \ \ / / \
* * * * * * * *
/ \ / \
* * * *
An AA tree on the other hand only needs to consider two shapes due
to the strict requirement that only right links can be red:
* *
\ \
* *
\
*
Whereas red-black trees require one bit of balancing metadata per
node (the color), AA trees require O(log(N)) bits of metadata per
node, in the form of an integer "level". The following invariants
hold for AA trees:
1. The level of every leaf node is one.
2. The level of every left child is exactly one less than that
of its parent.
3. The level of every right child is equal to or one less than
that of its parent.
4. The level of every right grandchild is strictly less than
that of its grandparent.
5. Every node of level greater than one has two children.
A link where the child's level is equal to that of its parent is
called a horizontal link, and is analogous to a red link in the
red-black tree. Individual right horizontal links are allowed, but
consecutive ones are forbidden; all left horizontal links are
forbidden. These are more restrictive constraints than the
analogous ones on red-black trees, with the result that
re-balancing an AA tree is procedurally much simpler than
re-balancing a red-black tree.
see: http://en.wikipedia.org/wiki/Andersson_tree
http://user.it.uu.se/~arnea/abs/simp.html
http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_andersson.aspx
"""
def __init__(self, items=None):
""" AATree() -> new empty tree.
AATree(mapping,) -> new tree initialized from a mapping
AATree(seq) -> new tree initialized from seq
[(k1, v1), (k2, v2), ... (kn, vn)]
"""
self._root = None
self._count = 0
# store keys in order of tree creation - preserve for copy, repr, ...
self._keys = []
if items is not None:
self.update(items)
def update(self, *args):
""" Update tree with items from mapping or seq
[(k1, v1), (k2, v2), ... (kn, vn)]
"""
for items in args:
try:
# if dict
gen = items.items()
except AttributeError:
# if sequence
gen = iter(items)
for k, v in gen:
self.insert(k, v)
def _new_node(self, key, value):
return Node(key, value, 1)
def root(self):
""" return root node """
return self._root
def _skew(self, node):
""" | |
L <- T ==>> L -> T
/ \ \ / / \
A B R A B R
"""
if node is None:
return None
elif node.left is None:
return node
elif node.left.level == node.level:
# swap the pointers of horizontal left links
lnode = node.left
node.left = lnode.right
lnode.right = node
return lnode
else:
return node
def _split(self, node):
""" | |
T -> R -> X ==>> R
/ / / \
A B T X
/ \
A B
"""
if node is None:
return None
elif node.right is None or node.right.right is None:
return node
elif node.level == node.right.right.level:
# two horizontal right links: take the middle node,
# elevate it and return it
rnode = node.right
node.right = rnode.left
rnode.left = node
rnode.level += 1
return rnode
else:
return node
def insert(self, key, value):
""" insert item into tree, if key exists change value """
def _insert(node, key, value):
# do the nromal binary tree insertion
if node is None:
return self._new_node(key, value)
elif key < node.key:
node.left = _insert(node.left, key, value)
elif key > node.key:
node.right = _insert(node.right, key, value)
else:
node.value = value
# perform skew and split - whether or not a rotation
# will occur or not is determined inside skew and split
node = self._skew(node)
node = self._split(node)
return node
# insert above will only change value for existing key
if key not in self._keys:
self._keys.append(key)
self._count += 1
self._root = _insert(self._root, key, value)
def remove(self, key):
""" remove item from tree """
def _remove(t, key):
if t is not None:
_remove.last = t
if key < t.key:
t.left = _remove(t.left, key)
else:
_remove.deleted = t
t.right = _remove(t.right, key)
if t == _remove.last and _remove.deleted is not None and \
key == _remove.deleted.key:
_remove.deleted.key = t.key
_remove.deleted.value = t.value
_remove.deleted = None
t = t.right
_remove.found = _remove.last
else:
left_level = 0 if t.left is None else t.left.level
right_level = 0 if t.right is None else t.right.level
if left_level < t.level - 1 or right_level < t.level - 1:
t.level -= 1
if right_level > t.level:
t.right.level = t.level
t = self._skew(t)
if t.right:
t.right = self._skew(t.right)
if t.right.right:
t.right.right = self._skew(t.right.right)
t = self._split(t)
if t.right:
t.right = self._split(t.right)
return t
_remove.found = None
if self._root is None:
return
self._root = _remove(self._root, key)
if _remove.found:
_remove.found.free()
self._keys.remove(key)
self._count -= 1
def clear(self):
""" empty tree """
def _clear(node):
if node is not None:
_clear(node.left)
_clear(node.right)
node.free()
_clear(self._root)
self._root = None
self._count = 0
self._keys = []
def remove_items(self, keys):
""" remove item with keys in keys """
for key in keys:
self.remove(key)
def get(self, key):
""" return value for key """
node = self._root
while node and node.key != key:
if key < node.key:
node = node.left
elif key > node.key:
node = node.right
if node:
return node.value
else:
return None
def __getitem__(self, x):
return self.get(x)
def __delitem__(self, x):
self.remove(x)
def __setitem__(self, key, value):
self.insert(key, value)
def __contains__(self, x):
return self.get(x) is not None
def is_empty(self):
return self._root is None
def traverse(self, func, order=ORDER_INFIX_LEFT_RIGHT):
""" traverse tree with defined order,
execute func for each node
"""
def _traverse(node, func, order):
if node is not None:
if order == ORDER_INFIX_LEFT_RIGHT:
_traverse(node.left, func, order)
func(node)
_traverse(node.right, func, order)
elif order == ORDER_INFIX_RIGHT_LEFT:
_traverse(node.right, func, order)
func(node)
_traverse(node.left, func, order)
elif order == ORDER_PREFIX_LEFT_RIGHT:
func(node)
_traverse(node.left, func, order)
_traverse(node.right, func, order)
elif order == ORDER_PREFIX_RIGHT_LEFT:
func(node)
_traverse(node.right, func, order)
_traverse(node.left, func, order)
elif order == ORDER_POSTFIX_LEFT_RIGHT:
_traverse(node.left, func, order)
_traverse(node.right, func, order)
func(node)
elif order == ORDER_POSTFIX_RIGHT_LEFT:
_traverse(node.right, func, order)
_traverse(node.left, func, order)
func(node)
_traverse(self._root, func, order)
def keys(self, reverse=False):
""" return keys """
return (x[0] for x in self.iter_items(reverse=reverse))
__iter__ = keys
def __reversed__(self):
return self.keys(reverse=True)
def values(self, reverse=False):
""" return values """
return (x[1] for x in self.iter_items(reverse=reverse))
def copy(self):
""" shallow copy of tree - tree structure, i.e. key insertion
order is preserved
"""
copytree = self.__class__()
for k in self._keys:
copytree[k] = self.get(k)
return copytree
__copy__ = copy
def __repr__(self):
selfname = self.__class__.__name__
gen = ("(%r, %r)" % (k, self.get(k)) for k in self._keys)
items = ", ".join(gen)
return "%s([%s])" % (selfname, items)
def __len__(self):
return self._count
def iter_items(self, reverse=False):
""" generator over (key, value) items """
if self.is_empty():
raise StopIteration
def _iter_items(node, reverse):
if node is not None:
if reverse:
n1 = node.right
n2 = node.left
else:
n1 = node.left
n2 = node.right
for item in _iter_items(n1, reverse):
yield item
yield node.key, node.value
for item in _iter_items(n2, reverse):
yield item
for item in _iter_items(self.root(), reverse):
yield item
def dump(self, order=ORDER_INFIX_LEFT_RIGHT):
if self._root is not None:
max_level = self._root.level
else:
max_level = 0
def _dump(node):
if node is not None:
level = max_level - node.level
print("%s(%r, %r)" % ('--' * level, node.key, node.value))
self.traverse(_dump, order)
print(repr(self))
class AnderssonTree(_AATree):
pass
class AATree(_AATree):
pass

26
setup.py Normal file
View file

@ -0,0 +1,26 @@
import os
from setuptools import setup
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
setup(
name='anderssontree',
version='0.1.0',
author='Darko Poljak',
author_email='darko.poljak@gmail.com',
description='Package provides Andersson Tree implementation in pure Python.',
license="GPLv3",
keywords=['AA Tree', 'Andersson Tree'],
url='https://github.com/darko-poljak/anderssontree',
download_url='https://github.com/darko-poljak/anderssontree',
packages=['anderssontree'],
long_description=read('README.rst'),
platforms="OS Independent",
classifiers=[
"Development Status :: 4 - Beta",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Topic :: Software Development :: Libraries :: Python Modules",
],
)

0
tests/__init__.py Normal file
View file

309
tests/test_aatree.py Normal file
View file

@ -0,0 +1,309 @@
#!/usr/bin/env python
# Author: Darko Poljak <darko.poljak@gmail.com>
# License: GPLv3
import unittest
import pickle
from random import randint, shuffle
from anderssontree import AnderssonTree
class AATreeException(Exception):
pass
def test_items_seq(maxval=200):
x = list(range(0, maxval))
y = list(zip(x, x))
return y
def test_items_dict(maxval=200):
return {x: x for x in range(0, maxval)}
class TestAATree(unittest.TestCase):
def test_init_empty(self):
tree = AnderssonTree()
self.assertEqual(len(tree), 0)
def test_init_dict(self):
d = test_items_dict()
tree = AnderssonTree(d)
self.assertEqual(len(tree), len(d))
self.check_aatree_properties(tree)
def test_init_seq(self):
s = test_items_seq()
tree = AnderssonTree(s)
self.assertEqual(len(tree), len(s))
self.check_aatree_properties(tree)
def test_pickle_protocol(self):
s = test_items_seq()
tree1 = AnderssonTree(s)
pickle_str = pickle.dumps(tree1, -1)
tree2 = pickle.loads(pickle_str)
self.assertEqual(len(tree1), len(tree2))
self.assertEqual(list(tree1.keys()), list(tree2.keys()))
self.assertEqual(list(tree1.values()), list(tree2.values()))
self.check_aatree_properties(tree1)
self.check_aatree_properties(tree2)
def test_max_btree_level(self):
def _map_levels(node, level, map_):
if node is not None:
map_[node.key] = level
next_level = level + 1
_map_levels(node.left, next_level, map_)
_map_levels(node.right, next_level, map_)
def _map_bin_tree_levels(root):
""" level is binary tree level, not AATree level """
map_ = {}
_map_levels(root, 0, map_)
max_level = max(map_.values())
for k, v in map_.items():
map_[k] = max_level - map_[k] + 1
return map_
keys = range(1, 14)
tree = AnderssonTree(zip(keys, keys))
mapping = _map_bin_tree_levels(tree.root())
maxl = max(mapping.values())
self.assertEqual(maxl, 5, 'Invalid max level %s!' % maxl)
def check_aatree_properties(self, tree, dump=True):
"""
The level of every leaf node is one.
The level of every left child is exactly one less than that of
its parent.
The level of every right child is equal to or one less than
that of its parent.
The level of every right grandchild is strictly less than that
of its grandparent.
Every node of level greater than one has two children.
"""
def _check_child_level(node, maxlevel):
if node is not None:
if node.level >= maxlevel:
msg = 'grandchild level >= grandparent level ' \
'at %s' % str(node.key)
raise AATreeException(msg)
_check_child_level(node.left, maxlevel)
_check_child_level(node.right, maxlevel)
def _check_properties(node, parent, grandparent):
if node is not None:
if node.left is None and node.right is None:
if node.level != 1:
raise AATreeException('leaf level is not 1 at '
'%s' % node.key)
if parent is not None and node.left is not None:
if node.left.level != node.level - 1:
raise AATreeException(
'left child level not exactly'
' one less than that of its parent at '
'%s' % str(node.key))
if parent is not None and node.right is not None:
if not (node.right.level == node.level or
node.right.level == node.level - 1):
raise AATreeException(
'right child level is'
' not equal to or one less than of its'
' parent at %s' % str(node.key))
if grandparent is not None:
maxl = grandparent.level
x = grandparent.right
if x:
_check_child_level(x.right, maxl)
_check_child_level(x.left, maxl)
if node.level > 1:
if node.left is None or node.right is None:
raise AATreeException(
'node with level > 1'
' does not have two children at'
' %s' % str(node.key))
_check_properties(node.left, node, parent)
_check_properties(node.right, node, parent)
try:
_check_properties(tree._root, None, None)
except AATreeException as e:
if dump:
print('')
tree.dump()
self.fail(str(e) + " - see tree dump")
def test_update(self):
d = test_items_seq()
tree = AnderssonTree()
tree.update(d)
self.assertEqual(len(tree), len(d))
self.assertEqual(set(tree.keys()), set(x[0] for x in d))
self.assertEqual(set(tree.values()), set(x[1] for x in d))
self.check_aatree_properties(tree)
def test_root(self):
x = [5, 6, 4]
tree = AnderssonTree(zip(x, x))
self.assertEqual(tree.root().key, 5)
def test_insert(self):
s = test_items_seq()
tree = AnderssonTree()
size = len(tree)
keys = [x[0] for x in s]
keyset = set()
shuffle(keys)
for k in keys:
keyset.add(k)
tree.insert(k, k)
size += 1
self.assertEqual(len(tree), size)
self.assertEqual(set(tree.keys()), keyset)
self.check_aatree_properties(tree)
def test_clear(self):
s = test_items_seq()
tree = AnderssonTree(s)
self.check_aatree_properties(tree)
tree.clear()
self.assertEqual(len(tree), 0)
def test_get(self):
s = test_items_seq()
tree = AnderssonTree(s)
self.check_aatree_properties(tree)
keys = [x[0] for x in s]
shuffle(keys)
for k in keys:
self.assertEqual(tree.get(k), k)
self.assertEqual(tree[k], k)
max_ = max(keys)
self.assertEqual(tree[max_ + 1], None)
def test_setitem(self):
s = test_items_seq()
tree = AnderssonTree(s)
self.check_aatree_properties(tree)
keys = [x[0] for x in s]
shuffle(keys)
for k in keys:
val = k * 10
tree[k] = val
self.assertEqual(tree.get(k), val)
def test_contains(self):
s = test_items_seq()
tree = AnderssonTree(s)
self.check_aatree_properties(tree)
keys = [x[0] for x in s]
shuffle(keys)
for k in keys:
self.assertTrue(k in tree)
self.assertFalse(max(keys) + 10 in tree)
def test_is_empty(self):
s = test_items_seq()
tree = AnderssonTree()
self.assertTrue(tree.is_empty())
tree.update(s)
self.assertFalse(tree.is_empty())
tree.clear()
self.assertTrue(tree.is_empty())
def test_keys(self):
s = test_items_seq()
tree = AnderssonTree(s)
keys = set(x[0] for x in s)
self.assertEqual(set(tree.keys()), keys)
def test_values(self):
s = test_items_seq()
tree = AnderssonTree(s)
values = set(x[1] for x in s)
self.assertEqual(set(tree.values()), values)
def test_reversed(self):
s = test_items_seq()
tree = AnderssonTree(s)
keys = list(reversed([x[0] for x in s]))
self.assertEqual(list(reversed(tree)), keys)
def test_copy(self):
s = test_items_seq()
tree1 = AnderssonTree(s)
tree2 = tree1.copy()
self.assertEqual(len(tree1), len(tree2))
self.assertEqual(repr(tree1), repr(tree2))
def test_repr(self):
s = [test_items_seq()]
keys = [x[0] for x in s]
shuffle(keys)
s = zip(keys, keys)
tree = AnderssonTree(s)
gen = ("(%r, %r)" % (x, x) for x in keys)
spam = ", ".join(gen)
spam = "%s([%s])" % (tree.__class__.__name__, spam)
self.assertEqual(repr(tree), spam)
def test_iter_items(self):
s = test_items_seq()
tree = AnderssonTree(s)
items = [x for x in tree.iter_items()]
self.assertEqual(items, sorted(s))
def test_iter_items_reversed(self):
s = test_items_seq()
tree = AnderssonTree(s)
items = [x for x in tree.iter_items(reverse=True)]
self.assertEqual(items, sorted(s, reverse=True))
def test_traverse(self):
s = test_items_seq()
tree = AnderssonTree(s)
visited = set()
def visit(node):
visited.add(node.key)
tree.traverse(visit)
keys = set([x[0] for x in s])
self.assertEqual(keys, visited)
def test_remove(self):
for i in range(10):
s = test_items_seq(100)
tree = AnderssonTree(s)
self.check_aatree_properties(tree)
size = len(tree)
keys = [x[0] for x in s]
keyset = set(keys)
shuffle(keys)
for k in keys:
keyset.remove(k)
tree.remove(k)
size -= 1
self.assertEqual(len(tree), size)
self.assertEqual(set(tree.keys()), keyset)
self.check_aatree_properties(tree, dump=False)
def test_remove_root(self):
s = test_items_seq(1000)
tree = AnderssonTree(s)
self.check_aatree_properties(tree)
size = len(tree)
keys = [x[0] for x in s]
keyset = set(keys)
while len(keyset) > 0:
k = tree.root().key
keyset.remove(k)
tree.remove(k)
size -= 1
self.assertEqual(len(tree), size)
self.assertEqual(set(tree.keys()), keyset)
self.check_aatree_properties(tree)
if __name__ == '__main__':
unittest.main()