srcs/toolbox/bst.c

00001 /* 
00002  * Copyright (c) 2005-2012 by KoanLogic s.r.l.
00003  */
00004 
00005 #include <toolbox/bst.h>
00006 #include <toolbox/carpal.h>
00007 #include <toolbox/memory.h>
00008 #include <toolbox/misc.h>
00009 
00010 struct u_bst_node_s
00011 {
00012     void *key, *val;                    /* node's key and value. */
00013     size_t nelem;                       /* number of elements in the subtrees
00014                                            rooted in this node. */
00015     struct u_bst_node_s *left, *right;  /* left and right subtrees. */
00016 };
00017 
00018 struct u_bst_s
00019 {
00020     int opts;
00021     int (*cmp) (const void *, const void *);    /* key compare function. */
00022     u_bst_type_t keytype, valtype;              /* nature of keys and values. */
00023     size_t keysize, valsize;                    /* size of keys and values. */
00024     void (*keyfree) (void *);                   /* key dtor. */
00025     void (*valfree) (void *);                   /* value dtor. */
00026     struct u_bst_node_s *root;                  /* parent node reference. */
00027 };
00028 
00029 static int u_bst_keycmp (const void *a, const void *b);
00030 static void u_bst_genfree (void *p);
00031 static int u_bst_node_new (u_bst_t *bst, const void *key, const void *val, 
00032         u_bst_node_t **pnode);
00033 static void u_bst_node_free (u_bst_t *bst, u_bst_node_t *node);
00034 static void u_bst_node_do_free (u_bst_t *bst, u_bst_node_t *node);
00035 static int u_bst_assign (void **pdst, const void *src, u_bst_type_t t, 
00036         size_t sz);
00037 static u_bst_node_t *u_bst_node_search (u_bst_node_t *node, const void *key, 
00038         int (*cmp)(const void *, const void *));
00039 static void u_bst_node_foreach (u_bst_node_t *node, 
00040         void (*cb)(u_bst_node_t *, void *), void *cb_args);
00041 static u_bst_node_t *u_bst_node_push_top (u_bst_t *bst, u_bst_node_t *node, 
00042         const void *key, const void *val);
00043 static u_bst_node_t *u_bst_node_push_bottom (u_bst_t *bst, const void *key, 
00044         const void *val);
00045 static u_bst_node_t *u_bst_node_push_rand (u_bst_t *bst, u_bst_node_t *node,
00046         const void *key, const void *val);
00047 static u_bst_node_t *u_bst_node_push (u_bst_t *bst, u_bst_node_t *node, 
00048         const void *key, const void *val);
00049 static u_bst_node_t *u_bst_node_find_nth (u_bst_node_t *node, size_t n);
00050 static u_bst_node_t *u_bst_node_promote_nth (u_bst_node_t *node, size_t n);
00051 static u_bst_node_t *u_bst_node_join_lr (u_bst_node_t *l, u_bst_node_t *r);
00052 static u_bst_node_t *u_bst_node_delete (u_bst_t *bst, u_bst_node_t *node, 
00053         const void *key, int *pfound);
00054 static u_bst_node_t *u_bst_node_balance (u_bst_node_t *node);
00055 
00056 #ifdef BST_DEBUG
00057 static int u_bst_keycmp_dbg (const void *a, const void *b);
00058 #endif  /* BST_DEBUG */
00059 
00136 int u_bst_new (int opts, u_bst_t **pbst)
00137 {
00138     u_bst_t *bst = NULL;
00139 
00140     dbg_return_if (pbst == NULL, ~0);
00141 
00142     warn_return_sif ((bst = u_malloc(sizeof *bst)) == NULL, ~0);
00143 
00144     bst->keytype = U_BST_TYPE_STRING;
00145     bst->keyfree = u_bst_genfree;
00146     bst->valtype = U_BST_TYPE_PTR;
00147     bst->valfree = NULL;
00148 #ifdef BST_DEBUG
00149     bst->cmp = u_bst_keycmp_dbg;
00150 #else
00151     bst->cmp = u_bst_keycmp;
00152 #endif  /* BST_DEBUG */
00153     bst->root = NULL;
00154     bst->opts = opts;
00155 
00156     /* Seed the PRNG in case we need to handle randomized insertion. */
00157     if (bst->opts & U_BST_OPT_RANDOMIZED)
00158         srand((unsigned int) getpid()); 
00159 
00160     *pbst = bst;
00161 
00162     return 0;
00163 }
00164 
00174 void u_bst_free (u_bst_t *bst)
00175 {
00176     if (bst)
00177     {
00178         u_bst_node_free(bst, bst->root);
00179         u_free(bst);
00180     }
00181 
00182     return;
00183 }
00184 
00201 int u_bst_push (u_bst_t *bst, const void *key, const void *val)
00202 {
00203     dbg_return_if (bst == NULL, ~0);
00204     dbg_return_if (key == NULL, ~0);
00205 
00206     if (bst->opts & U_BST_OPT_RANDOMIZED)
00207         bst->root = u_bst_node_push_rand(bst, bst->root, key, val);
00208     else if (bst->opts & U_BST_OPT_PUSH_TOP)
00209         bst->root = u_bst_node_push_top(bst, bst->root, key, val);
00210     else /* The default is bottom insertion. */
00211         bst->root = u_bst_node_push_bottom(bst, key, val);
00212 
00213     return bst->root ? 0 : ~0;
00214 }
00215 
00227 int u_bst_delete (u_bst_t *bst, const void *key)
00228 {
00229     int found = 0;
00230 
00231     dbg_return_if (bst == NULL, ~0);
00232     dbg_return_if (key == NULL, ~0);
00233 
00234     bst->root = u_bst_node_delete(bst, bst->root, key, &found);
00235 
00236     return found ? 0 : ~0;
00237 }
00238 
00251 u_bst_node_t *u_bst_search (u_bst_t *bst, const void *key)
00252 {
00253     dbg_return_if (bst == NULL, NULL);
00254     dbg_return_if (key == NULL, NULL);
00255 
00256     return u_bst_node_search(bst->root, key, bst->cmp);
00257 }
00258 
00271 u_bst_node_t *u_bst_find_nth (u_bst_t *bst, size_t n)
00272 {
00273     dbg_return_if (bst == NULL, NULL);
00274 
00275     return u_bst_node_find_nth(bst->root, n);
00276 }
00277 
00288 ssize_t u_bst_count (u_bst_t *bst)
00289 {
00290     dbg_return_if (bst == NULL, (ssize_t) -1);
00291 
00292     if (bst->root == NULL)
00293         return 0;
00294 
00295     return bst->root->nelem;
00296 }
00297 
00312 int u_bst_foreach (u_bst_t *bst, void (*cb)(u_bst_node_t *, void *), 
00313         void *cb_args)
00314 {
00315     dbg_return_if (bst == NULL, ~0);
00316     dbg_return_if (cb == NULL, ~0);
00317 
00318     u_bst_node_foreach(bst->root, cb, cb_args);
00319 
00320     return 0;
00321 }
00322 
00334 int u_bst_balance (u_bst_t *bst)
00335 {
00336     dbg_return_if (bst == NULL, ~0);
00337 
00338     bst->root = u_bst_node_balance(bst->root);
00339 
00340     return 0;
00341 }
00342 
00355 u_bst_node_t *u_bst_rotate (u_bst_node_t *pivot, u_bst_rot_t dir)
00356 {
00357     u_bst_node_t *newroot;    /* It will become the new parent node. */
00358 
00359     dbg_return_if (pivot == NULL, NULL);
00360 
00361     switch (dir)
00362     {
00363         /* Promote the right child. */
00364         case U_BST_ROT_LEFT:
00365             newroot = pivot->right;
00366             pivot->right = newroot->left;
00367             newroot->left = pivot;
00368             /* Update child nodes' counters. */
00369             newroot->nelem = pivot->nelem;
00370             pivot->nelem -= (newroot->right ? newroot->right->nelem + 1 : 1);
00371             break;
00372 
00373         /* Promote the left child. */
00374         case U_BST_ROT_RIGHT:
00375             newroot = pivot->left;
00376             pivot->left = newroot->right;
00377             newroot->right = pivot;
00378             /* Update child nodes' counters. */
00379             newroot->nelem = pivot->nelem;
00380             pivot->nelem -= (newroot->left ? newroot->left->nelem + 1 : 1);
00381             break;
00382     }
00383 
00384     return newroot;
00385 }
00386 
00400 int u_bst_set_keyattr (u_bst_t *bst, u_bst_type_t kt, size_t ks)
00401 {
00402     dbg_return_if (bst == NULL, ~0);
00403     dbg_return_if (kt == U_BST_TYPE_OPAQUE && ks == 0, ~0);
00404 
00405     bst->keytype = kt;
00406     bst->keysize = ks;
00407 
00408     if (kt == U_BST_TYPE_OPAQUE)
00409         bst->keyfree = u_bst_genfree;
00410 
00411     return 0;
00412 }
00413 
00427 int u_bst_set_valattr (u_bst_t *bst, u_bst_type_t vt, size_t vs)
00428 {
00429     dbg_return_if (bst == NULL, ~0);
00430     dbg_return_if (vt == U_BST_TYPE_OPAQUE && vs == 0, ~0);
00431 
00432     bst->valtype = vt;
00433     bst->valsize = vs;
00434 
00435     if (vt == U_BST_TYPE_OPAQUE)
00436         bst->valfree = u_bst_genfree;
00437 
00438     return 0;
00439 }
00440 
00452 int u_bst_set_cmp (u_bst_t *bst, int (*f)(const void *, const void *))
00453 {
00454     dbg_return_if (bst == NULL, ~0);
00455     dbg_return_if (f == NULL, ~0);
00456 
00457     bst->cmp = f;
00458 
00459     return 0;
00460 }
00461 
00473 int u_bst_set_keyfree (u_bst_t *bst, void (*f)(void *))
00474 {
00475     dbg_return_if (bst == NULL, ~0);
00476     dbg_return_if (f == NULL, ~0);
00477 
00478     bst->keyfree = f;
00479 
00480     return 0;
00481 }
00482 
00494 int u_bst_set_valfree (u_bst_t *bst, void (*f)(void *))
00495 {
00496     dbg_return_if (bst == NULL, ~0);
00497     dbg_return_if (f == NULL, ~0);
00498 
00499     bst->valfree = f;
00500 
00501     return 0;
00502 }
00503 
00513 const void *u_bst_node_key (u_bst_node_t *node)
00514 {
00515     dbg_return_if (node == NULL, NULL);
00516 
00517     return node->key;
00518 }
00519 
00529 const void *u_bst_node_val (u_bst_node_t *node)
00530 {
00531     dbg_return_if (node == NULL, NULL);
00532 
00533     return node->val;
00534 }
00535 
00545 ssize_t u_bst_node_count (u_bst_node_t *node)
00546 {
00547     dbg_return_if (node == NULL, -1);
00548 
00549     return node->nelem;
00550 }
00551 
00561 int u_bst_empty (u_bst_t *bst)
00562 {
00563     dbg_return_if (bst == NULL, 1); 
00564 
00565     return (bst->root == NULL);
00566 }
00567 
00572 static int u_bst_node_new (u_bst_t *bst, const void *key, const void *val, 
00573         u_bst_node_t **pnode)
00574 {
00575     u_bst_node_t *node = NULL;
00576 
00577     dbg_return_if (bst == NULL, ~0);
00578     dbg_return_if (pnode == NULL, ~0);
00579     dbg_return_if (key == NULL, ~0);
00580 
00581     warn_err_sif ((node = u_zalloc(sizeof *node)) == NULL);
00582 
00583     warn_err_if (u_bst_assign(&node->key, key, bst->keytype, bst->keysize));
00584     warn_err_if (u_bst_assign(&node->val, val, bst->valtype, bst->valsize));
00585 
00586     node->right = node->left = NULL;
00587     node->nelem = 1;    /* Count itself. */
00588 
00589     *pnode = node;
00590 
00591     return 0;
00592 err:
00593     u_bst_node_do_free(bst, node);
00594     return ~0;
00595 }
00596 
00597 static void u_bst_node_free (u_bst_t *bst, u_bst_node_t *node)
00598 {
00599     if (node == NULL)
00600         return;
00601 
00602     /* Recursively free all child nodes (post-order). */
00603     u_bst_node_free(bst, node->left);
00604     u_bst_node_free(bst, node->right);
00605     u_bst_node_do_free(bst, node);
00606 
00607     return;
00608 }
00609 
00610 static void u_bst_node_do_free (u_bst_t *bst, u_bst_node_t *node)
00611 {
00612     warn_return_if (bst == NULL, );
00613 
00614     if (node == NULL)
00615         return;
00616 
00617     if (bst->keyfree && node->key) 
00618         bst->keyfree(node->key);
00619 
00620     if (bst->valfree && node->val) 
00621         bst->valfree(node->val);
00622 
00623     u_free(node);
00624 
00625     return;
00626 }
00627 
00628 static u_bst_node_t *u_bst_node_search (u_bst_node_t *node, const void *key, 
00629         int (*cmp)(const void *, const void *))
00630 {
00631     int rc;
00632 
00633     dbg_return_if (cmp == NULL, NULL);
00634     dbg_return_if (key == NULL, NULL);
00635 
00636     /* We've reached an external node: the quest ends here. */
00637     if (node == NULL)
00638         return NULL;
00639 
00640     /* If keys match, we're done. */
00641     if ((rc = cmp(key, node->key)) == 0)
00642         return node;
00643 
00644     /* Searched key is greater: recur into the right subtree. */
00645     if (rc > 0)
00646         return u_bst_node_search(node->right, key, cmp);
00647 
00648     /* The searched key is smaller, go left. */
00649     return u_bst_node_search(node->left, key, cmp);
00650 }
00651 
00652 static int u_bst_assign (void **pdst, const void *src, u_bst_type_t t, 
00653         size_t sz)
00654 {
00655     dbg_return_if (pdst == NULL, ~0);
00656 
00657     switch (t)
00658     {
00659         case U_BST_TYPE_STRING:
00660             warn_err_sif (src && (*pdst = u_strdup(src)) == NULL);
00661             break;
00662         case U_BST_TYPE_OPAQUE:
00663             if (src)
00664             {
00665                 warn_err_ifm (sz == 0, "0-len opaque type !");
00666                 warn_err_sif ((*pdst = u_malloc(sz)) == NULL);
00667                 memcpy(*pdst, src, sz);
00668             }
00669             break;
00670         case U_BST_TYPE_PTR:
00671             memcpy(pdst, &src, sizeof(void **));
00672             break;
00673     }
00674 
00675     return 0;
00676 err:
00677     return ~0;
00678 }
00679 
00680 /* Do in-order tree traversal.  Note that this provides a "natural" sort 
00681  * of BST elements. 
00682  * TODO: give a way to break the walk ? */
00683 static void u_bst_node_foreach (u_bst_node_t *node, 
00684         void (*cb)(u_bst_node_t *, void *), void *cb_args)
00685 {
00686     if (node == NULL)
00687         return;
00688 
00689     u_bst_node_foreach(node->left, cb, cb_args);
00690     cb(node, cb_args);
00691     u_bst_node_foreach(node->right, cb, cb_args);
00692 
00693     return;
00694 }
00695 
00696 static u_bst_node_t *u_bst_node_push_rand (u_bst_t *bst, u_bst_node_t *node, 
00697         const void *key, const void *val)
00698 {
00699     dbg_return_if (bst == NULL, NULL);
00700     dbg_return_if (key == NULL, NULL);
00701 
00702     if (node == NULL)
00703     {
00704         warn_err_if (u_bst_node_new(bst, key, val, &node));
00705         return node;
00706     }
00707 
00708     /* The new node is inserted on the top with 1/nelem+1 probability. */
00709     if ((size_t) rand() < RAND_MAX / (node->nelem + 1))
00710         return u_bst_node_push_top(bst, node, key, val);
00711 
00712     if (bst->cmp(key, node->key) < 0)
00713         node->left = u_bst_node_push(bst, node->left, key, val);
00714     else
00715         node->right = u_bst_node_push(bst, node->right, key, val);
00716 
00717     node->nelem += 1;
00718 
00719     return node;
00720 err:
00721     return NULL;
00722 }
00723 
00724 static u_bst_node_t *u_bst_node_push (u_bst_t *bst, u_bst_node_t *node, 
00725         const void *key, const void *val)
00726 {
00727     dbg_return_if (bst == NULL, NULL);
00728     dbg_return_if (key == NULL, NULL);
00729 
00730     if (node == NULL)
00731     {
00732         warn_err_if (u_bst_node_new(bst, key, val, &node));
00733         return node;
00734     }
00735 
00736     if (bst->cmp(key, node->key) < 0)
00737         node->left = u_bst_node_push(bst, node->left, key, val);
00738     else
00739         node->right = u_bst_node_push(bst, node->right, key, val);
00740 
00741     node->nelem += 1;
00742 
00743     return node;
00744 err:
00745     return NULL;
00746 }
00747  
00748 static u_bst_node_t *u_bst_node_push_bottom (u_bst_t *bst, const void *key, 
00749         const void *val)
00750 {
00751     u_bst_node_t *parent, *node;
00752 
00753     dbg_return_if (bst == NULL, NULL);
00754     dbg_return_if (key == NULL, NULL);
00755 
00756     /* Empty BST, handle special case. */
00757     if (u_bst_empty(bst))
00758     {
00759         warn_err_if (u_bst_node_new(bst, key, val, &bst->root));
00760         return bst->root;
00761     }
00762 
00763     /* Find a way through the non-empty BST until we reach an external node. */
00764     for (node = bst->root;
00765             node != NULL;
00766             node = (bst->cmp(key, node->key) < 0) ? node->left : node->right)
00767     {
00768         /* Save parent reference.  It is needed later, on loop exhaustion, to 
00769          * fix left/child links of the node to which we attach. */
00770         parent = node;
00771 
00772         /* Increment children counter of each node while we traverse. */
00773         node->nelem += 1;
00774     }
00775 
00776     /* Create a new node with the supplied (key, val). */
00777     warn_err_if (u_bst_node_new(bst, key, val, &node));
00778 
00779     /* Choose the subtree to which it belongs. */
00780     if (bst->cmp(key, parent->key) < 0)
00781         parent->left = node;
00782     else
00783         parent->right = node;
00784 
00785     return bst->root;
00786 err:
00787     return NULL;
00788 }
00789 
00790 static u_bst_node_t *u_bst_node_push_top (u_bst_t *bst, u_bst_node_t *node, 
00791         const void *key, const void *val)
00792 {
00793     u_bst_node_t *newnode = NULL;
00794 
00795     dbg_return_if (bst == NULL, NULL);
00796     dbg_return_if (key == NULL, NULL);
00797 
00798     /* External node reached: create the node. */
00799     if (node == NULL)
00800     {
00801         warn_err_if (u_bst_node_new(bst, key, val, &newnode));
00802         return newnode;
00803     }
00804 
00805     /* Update child counter of the traversed node. */
00806     node->nelem += 1;
00807 
00808     /* Let the created node bubble up through subsequent rotations. */
00809     if (bst->cmp(key, node->key) < 0)
00810     {
00811         node->left = u_bst_node_push_top(bst, node->left, key, val);
00812         node = u_bst_rotate(node, U_BST_ROT_RIGHT);
00813     }
00814     else
00815     {
00816         node->right = u_bst_node_push_top(bst, node->right, key, val);
00817         node = u_bst_rotate(node, U_BST_ROT_LEFT);
00818     }
00819     
00820     return node;
00821 err:
00822     return NULL;
00823 }
00824 
00825 static u_bst_node_t *u_bst_node_promote_nth (u_bst_node_t *node, size_t n)
00826 {
00827     size_t t;
00828     
00829     dbg_return_if (node == NULL, NULL);
00830 
00831     t = (node->left == NULL) ? 0 : node->left->nelem;
00832 
00833     if (t > n)
00834     {
00835         node->left = u_bst_node_promote_nth(node->left, n);
00836         node = u_bst_rotate(node, U_BST_ROT_RIGHT);
00837     }
00838 
00839     if (t < n)
00840     {
00841         node->right = u_bst_node_promote_nth(node->right, n - (t + 1));
00842         node = u_bst_rotate(node, U_BST_ROT_LEFT);
00843     }
00844 
00845     return node;
00846 }
00847 
00848 static u_bst_node_t *u_bst_node_join_lr (u_bst_node_t *l, u_bst_node_t *r)
00849 {
00850     if (r == NULL)
00851         return l;
00852 
00853     /* Make the smallest node in the right subtree the new BST root. */
00854     r = u_bst_node_promote_nth(r, 0);
00855 
00856     /* Let the left subtree become the left child of the new root. */
00857     r->left = l;
00858 
00859     return r;
00860 }
00861 
00862 static u_bst_node_t *u_bst_node_delete (u_bst_t *bst, u_bst_node_t *node, 
00863         const void *key, int *pfound)
00864 {
00865     int rc;
00866     u_bst_node_t *delnode;
00867 
00868     if (node == NULL)
00869         return NULL;
00870 
00871     /* Search on the left subtree. */
00872     if ((rc = bst->cmp(key, node->key)) < 0)
00873         node->left = u_bst_node_delete(bst, node->left, key, pfound);
00874 
00875     /* Search on the right subtree. */
00876     if (rc > 0)
00877         node->right = u_bst_node_delete(bst, node->right, key, pfound);
00878 
00879     /* Found ! Evict it. */
00880     if (rc == 0)
00881     {
00882         delnode = node;
00883         node = u_bst_node_join_lr(node->left, node->right);
00884         u_bst_node_do_free(bst, delnode);
00885         *pfound = 1;
00886     }
00887 
00888     /* Update child nodes' counter. */
00889     if (node && *pfound)
00890         node->nelem -= 1;
00891 
00892     return node;
00893 }
00894 
00895 static u_bst_node_t *u_bst_node_find_nth (u_bst_node_t *node, size_t n)
00896 {
00897     size_t t;
00898 
00899     if (node == NULL)
00900         return NULL;
00901 
00902     /* Store the number of elements in the left subtree. */
00903     t = (node->left == NULL) ? 0 : node->left->nelem;
00904 
00905     /* If 't' is smaller than the searched index, the n-th node hides
00906      * in the left subtree. */
00907     if (t > n)
00908         return u_bst_node_find_nth(node->left, n);   
00909 
00910     /* If 't' is larger than the searched index, the n-th node hides
00911      * in the right subtree at index n-(t+1). */
00912     if (t < n)
00913         return u_bst_node_find_nth(node->right, n - (t + 1));
00914 
00915     return node;    /* Found ! */
00916 }
00917 
00918 static u_bst_node_t *u_bst_node_balance (u_bst_node_t *node)
00919 {
00920     if (node == NULL || node->nelem < 2)
00921         return node;
00922 
00923     /* Promote the median node to the BST root. */
00924     node = u_bst_node_promote_nth(node, node->nelem / 2);
00925 
00926     /* Then go recursively into its subtrees. */
00927     node->left  = u_bst_node_balance(node->left);
00928     node->right = u_bst_node_balance(node->right);
00929 
00930     return node;
00931 }
00932 
00933 static int u_bst_keycmp (const void *a, const void *b)
00934 {
00935     return strcmp((const char *) a, (const char *) b);
00936 }
00937 
00938 static void u_bst_genfree (void *p) 
00939 { 
00940     u_free(p); 
00941     return;
00942 }
00943 
00944 #ifdef BST_DEBUG
00945 static int u_bst_keycmp_dbg (const void *a, const void *b)
00946 {
00947     int rc;
00948     const char *x = (const char *) a;
00949     const char *y = (const char *) b;
00950 
00951     rc = strcmp(x, y);
00952     u_con("%s %c %s", x, (rc == 0) ? '=' : (rc > 0) ? '>' : '<', y);
00953 
00954     return rc;
00955 }
00956 #endif  /* BST_DEBUG */

←Products
© 2005-2012 - KoanLogic S.r.l. - All rights reserved