/* Rax -- A radix tree implementation. * * Copyright (c) 2017, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "rax.h" //#ifndef RAX_MALLOC_INCLUDE //#define RAX_MALLOC_INCLUDE "rax_malloc.h" //#endif // //#include RAX_MALLOC_INCLUDE "rax_malloc.h" #include "rax_malloc.h" /* This is a special pointer that is guaranteed to never have the same value * of a radix tree node. It's used in order to report "not found" error without * requiring the function to have multiple return values. */ void *raxNotFound = (void *) "rax-not-found-pointer"; /* -------------------------------- Debugging ------------------------------ */ void raxDebugShowNode(const char *msg, raxNode *n); /* Turn debugging messages on/off. */ #if 0 #define debugf(...) \ do { \ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \ printf(__VA_ARGS__); \ fflush(stdout); \ } while (0); #define debugnode(msg,n) raxDebugShowNode(msg,n) #else #define debugf(...) #define debugnode(msg, n) #endif /* ------------------------- raxStack functions -------------------------- * The raxStack is a simple stack of pointers that is capable of switching * from using a stack-allocated array to dynamic heap once a given number of * items are reached. It is used in order to retain the list of parent nodes * while walking the radix tree in order to implement certain operations that * need to navigate the tree upward. * ------------------------------------------------------------------------- */ /* Initialize the stack. */ static inline void raxStackInit(raxStack *ts) { ts->stack = ts->static_items; ts->items = 0; ts->maxitems = RAX_STACK_STATIC_ITEMS; ts->oom = 0; } /* Push an item into the stack, returns 1 on success, 0 on out of memory. */ static inline int raxStackPush(raxStack *ts, void *ptr) { //int raxStackPush(raxStack *ts, void *ptr) { if (ts->items == ts->maxitems) { if (ts->stack == ts->static_items) { ts->stack = rax_malloc(sizeof(void *) * ts->maxitems * 2); if (ts->stack == NULL) { ts->stack = ts->static_items; ts->oom = 1; errno = ENOMEM; return 0; } memcpy(ts->stack, ts->static_items, sizeof(void *) * ts->maxitems); } else { void **newalloc = rax_realloc(ts->stack, sizeof(void *) * ts->maxitems * 2); if (newalloc == NULL) { ts->oom = 1; errno = ENOMEM; return 0; } ts->stack = newalloc; } ts->maxitems *= 2; } ts->stack[ts->items] = ptr; ts->items++; return 1; } /* Pop an item from the stack, the function returns NULL if there are no * items to pop. */ static inline void *raxStackPop(raxStack *ts) { if (ts->items == 0) return NULL; ts->items--; return ts->stack[ts->items]; } /* Return the stack item at the top of the stack without actually consuming * it. */ static inline void *raxStackPeek(raxStack *ts) { if (ts->items == 0) return NULL; return ts->stack[ts->items - 1]; } /* Free the stack in case we used heap allocation. */ static inline void raxStackFree(raxStack *ts) { if (ts->stack != ts->static_items) rax_free(ts->stack); } /* ---------------------------------------------------------------------------- * Radix tree implementation * --------------------------------------------------------------------------*/ /* Allocate a new non compressed node with the specified number of children. * If datafiled is true, the allocation is made large enough to hold the * associated data pointer. * Returns the new node pointer. On out of memory NULL is returned. */ raxNode *raxNewNode(size_t children, int datafield) { size_t nodesize = sizeof(raxNode) + children + sizeof(raxNode *) * children; if (datafield) nodesize += sizeof(void *); raxNode *node = rax_malloc(nodesize); if (node == NULL) return NULL; node->iskey = 0; node->isnull = 0; node->iscompr = 0; node->size = children; return node; } /* Allocate a new rax and return its pointer. On out of memory the function * returns NULL. */ rax *raxNew(void) { rax *rax = rax_malloc(sizeof(*rax)); if (rax == NULL) return NULL; rax->numele = 0; rax->numnodes = 1; rax->head = raxNewNode(0, 0); if (rax->head == NULL) { rax_free(rax); return NULL; } else { return rax; } } /* Return the current total size of the node. */ #define raxNodeCurrentLength(n) ( \ sizeof(raxNode)+(n)->size+ \ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \ (((n)->iskey && !(n)->isnull)*sizeof(void*)) \ ) /* realloc the node to make room for auxiliary data in order * to store an item in that node. On out of memory NULL is returned. */ raxNode *raxReallocForData(raxNode *n, void *data) { if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */ size_t curlen = raxNodeCurrentLength(n); return rax_realloc(n, curlen + sizeof(void *)); } /* Set the node auxiliary data to the specified pointer. */ void raxSetData(raxNode *n, void *data) { n->iskey = 1; if (data != NULL) { n->isnull = 0; void **ndata = (void **) ((char *) n + raxNodeCurrentLength(n) - sizeof(void *)); memcpy(ndata, &data, sizeof(data)); } else { n->isnull = 1; } } /* Get the node auxiliary data. */ void *raxGetData(raxNode *n) { if (n->isnull) return NULL; void **ndata = (void **) ((char *) n + raxNodeCurrentLength(n) - sizeof(void *)); void *data; memcpy(&data, ndata, sizeof(data)); return data; } /* Add a new child to the node 'n' representing the character 'c' and return * its new pointer, as well as the child pointer by reference. Additionally * '***parentlink' is populated with the raxNode pointer-to-pointer of where * the new child was stored, which is useful for the caller to replace the * child pointer if it gets reallocated. * * On success the new parent node pointer is returned (it may change because * of the realloc, so the caller should discard 'n' and use the new value). * On out of memory NULL is returned, and the old node is still valid. */ raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) { assert(n->iscompr == 0); size_t curlen = sizeof(raxNode) + n->size + sizeof(raxNode *) * n->size; size_t newlen; /* Alloc the new child we will link to 'n'. */ raxNode *child = raxNewNode(0, 0); if (child == NULL) return NULL; /* Make space in the original node. */ if (n->iskey) curlen += sizeof(void *); newlen = curlen + sizeof(raxNode *) + 1; /* Add 1 char and 1 pointer. */ raxNode *newn = rax_realloc(n, newlen); if (newn == NULL) { rax_free(child); return NULL; } n = newn; /* After the reallocation, we have 5/9 (depending on the system * pointer size) bytes at the end, that is, the additional char * in the 'data' section, plus one pointer to the new child: * * [numc][abx][ap][bp][xp]|auxp|..... * * Let's find where to insert the new child in order to make sure * it is inserted in-place lexicographically. */ int pos; for (pos = 0; pos < n->size; pos++) { if (n->data[pos] > c) break; } /* Now, if present, move auxiliary data pointer at the end * so that we can mess with the other data without overwriting it. * We will obtain something like that: * * [numc][abx][ap][bp][xp].....|auxp| */ unsigned char *src; if (n->iskey && !n->isnull) { src = n->data + n->size + sizeof(raxNode *) * n->size; memmove(src + 1 + sizeof(raxNode *), src, sizeof(void *)); } /* Now imagine we are adding a node with edge 'c'. The insertion * point is between 'b' and 'x', so the 'pos' variable value is * To start, move all the child pointers after the insertion point * of 1+sizeof(pointer) bytes on the right, to obtain: * * [numc][abx][ap][bp].....[xp]|auxp| */ src = n->data + n->size + sizeof(raxNode *) * pos; memmove(src + 1 + sizeof(raxNode *), src, sizeof(raxNode *) * (n->size - pos)); /* Now make the space for the additional char in the data section, * but also move the pointers before the insertion point in the right * by 1 byte, in order to obtain the following: * * [numc][ab.x][ap][bp]....[xp]|auxp| */ src = n->data + pos; memmove(src + 1, src, n->size - pos + sizeof(raxNode *) * pos); /* We can now set the character and its child node pointer to get: * * [numc][abcx][ap][bp][cp]....|auxp| * [numc][abcx][ap][bp][cp][xp]|auxp| */ n->data[pos] = c; n->size++; raxNode **childfield = (raxNode **) (n->data + n->size + sizeof(raxNode *) * pos); memcpy(childfield, &child, sizeof(child)); *childptr = child; *parentlink = childfield; return n; } /* Return the pointer to the last child pointer in a node. For the compressed * nodes this is the only child pointer. */ #define raxNodeLastChildPtr(n) ((raxNode**) ( \ ((char*)(n)) + \ raxNodeCurrentLength(n) - \ sizeof(raxNode*) - \ (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ )) /* Return the pointer to the first child pointer. */ #define raxNodeFirstChildPtr(n) ((raxNode**)((n)->data+(n)->size)) /* Turn the node 'n', that must be a node without any children, into a * compressed node representing a set of nodes linked one after the other * and having exactly one child each. The node can be a key or not: this * property and the associated value if any will be preserved. * * The function also returns a child node, since the last node of the * compressed chain cannot be part of the chain: it has zero children while * we can only compress inner nodes with exactly one child each. */ raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) { assert(n->size == 0 && n->iscompr == 0); void *data = NULL; /* Initialized only to avoid warnings. */ size_t newsize; debugf("Compress node: %.*s\n", (int) len, s); /* Allocate the child to link to this node. */ *child = raxNewNode(0, 0); if (*child == NULL) return NULL; /* Make space in the parent node. */ newsize = sizeof(raxNode) + len + sizeof(raxNode *); if (n->iskey) { data = raxGetData(n); /* To restore it later. */ if (!n->isnull) newsize += sizeof(void *); } raxNode *newn = rax_realloc(n, newsize); if (newn == NULL) { rax_free(*child); return NULL; } n = newn; n->iscompr = 1; n->size = len; memcpy(n->data, s, len); if (n->iskey) raxSetData(n, data); raxNode **childfield = raxNodeLastChildPtr(n); memcpy(childfield, child, sizeof(*child)); return n; } /* Low level function that walks the tree looking for the string * 's' of 'len' bytes. The function returns the number of characters * of the key that was possible to process: if the returned integer * is the same as 'len', then it means that the node corresponding to the * string was found (however it may not be a key in case the node->iskey is * zero or if simply we stopped in the middle of a compressed node, so that * 'splitpos' is non zero). * * Otherwise if the returned integer is not the same as 'len', there was an * early stop during the tree walk because of a character mismatch. * * The node where the search ended (because the full string was processed * or because there was an early stop) is returned by reference as * '*stopnode' if the passed pointer is not NULL. This node link in the * parent's node is returned as '*plink' if not NULL. Finally, if the * search stopped in a compressed node, '*splitpos' returns the index * inside the compressed node where the search ended. This is useful to * know where to split the node for insertion. * * Note that when we stop in the middle of a compressed node with * a perfect match, this function will return a length equal to the * 'len' argument (all the key matched), and will return a *splitpos which is * always positive (that will represent the index of the character immediately * *after* the last match in the current compressed node). * * When instead we stop at a compressed node and *splitpos is zero, it * means that the current node represents the key (that is, none of the * compressed node characters are needed to represent the key, just all * its parents nodes). */ static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) { raxNode *h = rax->head; raxNode **parentlink = &rax->head; size_t i = 0; /* Position in the string. */ size_t j = 0; /* Position in the node children (or bytes if compressed).*/ while (h->size && i < len) { debugnode("Lookup current node", h); unsigned char *v = h->data; if (h->iscompr) { for (j = 0; j < h->size && i < len; j++, i++) { if (v[j] != s[i]) break; } if (j != h->size) break; } else { /* Even when h->size is large, linear scan provides good * performances compared to other approaches that are in theory * more sounding, like performing a binary search. */ for (j = 0; j < h->size; j++) { if (v[j] == s[i]) break; } if (j == h->size) break; i++; } if (ts) raxStackPush(ts, h); /* Save stack of parent nodes. */ raxNode **children = raxNodeFirstChildPtr(h); if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */ memcpy(&h, children + j, sizeof(h)); parentlink = children + j; j = 0; /* If the new node is compressed and we do not iterate again (since i == l) set the split position to 0 to signal this node represents the searched key. */ } debugnode("Lookup stop node is", h); if (stopnode) *stopnode = h; if (plink) *plink = parentlink; if (splitpos && h->iscompr) *splitpos = j; return i; } /* Insert the element 's' of size 'len', setting as auxiliary data * the pointer 'data'. If the element is already present, the associated * data is updated (only if 'overwrite' is set to 1), and 0 is returned, * otherwise the element is inserted and 1 is returned. On out of memory the * function returns 0 as well but sets errno to ENOMEM, otherwise errno will * be set to 0. */ int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) { size_t i; int j = 0; /* Split position. If raxLowWalk() stops in a compressed node, the index 'j' represents the char we stopped within the compressed node, that is, the position where to split the node for insertion. */ raxNode *h, **parentlink; debugf("### Insert %.*s with value %p\n", (int) len, s, data); i = raxLowWalk(rax, s, len, &h, &parentlink, &j, NULL); /* If i == len we walked following the whole string. If we are not * in the middle of a compressed node, the string is either already * inserted or this middle node is currently not a key, but can represent * our key. We have just to reallocate the node and make space for the * data pointer. */ if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) { debugf("### Insert: node representing key exists\n"); /* Make space for the value pointer if needed. */ if (!h->iskey || (h->isnull && overwrite)) { h = raxReallocForData(h, data); if (h) memcpy(parentlink, &h, sizeof(h)); } if (h == NULL) { errno = ENOMEM; return 0; } /* Update the existing key if there is already one. */ if (h->iskey) { if (old) *old = raxGetData(h); if (overwrite) raxSetData(h, data); errno = 0; return 0; /* Element already exists. */ } /* Otherwise set the node as a key. Note that raxSetData() * will set h->iskey. */ raxSetData(h, data); rax->numele++; return 1; /* Element inserted. */ } /* If the node we stopped at is a compressed node, we need to * split it before to continue. * * Splitting a compressed node have a few possibile cases. * Imagine that the node 'h' we are currently at is a compressed * node contaning the string "ANNIBALE" (it means that it represents * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child * pointer of this node pointing at the 'E' node, because remember that * we have characters at the edges of the graph, not inside the nodes * themselves. * * In order to show a real case imagine our node to also point to * another compressed node, that finally points at the node without * children, representing 'O': * * "ANNIBALE" -> "SCO" -> [] * * When inserting we may face the following cases. Note that all the cases * require the insertion of a non compressed node with exactly two * children, except for the last case which just requires splitting a * compressed node. * * 1) Inserting "ANNIENTARE" * * |B| -> "ALE" -> "SCO" -> [] * "ANNI" -> |-| * |E| -> (... continue algo ...) "NTARE" -> [] * * 2) Inserting "ANNIBALI" * * |E| -> "SCO" -> [] * "ANNIBAL" -> |-| * |I| -> (... continue algo ...) [] * * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node) * * |N| -> "NIBALE" -> "SCO" -> [] * |A| -> |-| * |G| -> (... continue algo ...) |O| -> [] * * 4) Inserting "CIAO" * * |A| -> "NNIBALE" -> "SCO" -> [] * |-| * |C| -> (... continue algo ...) "IAO" -> [] * * 5) Inserting "ANNI" * * "ANNI" -> "BALE" -> "SCO" -> [] * * The final algorithm for insertion covering all the above cases is as * follows. * * ============================= ALGO 1 ============================= * * For the above cases 1 to 4, that is, all cases where we stopped in * the middle of a compressed node for a character mismatch, do: * * Let $SPLITPOS be the zero-based index at which, in the * compressed node array of characters, we found the mismatching * character. For example if the node contains "ANNIBALE" and we add * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the * mismatching character is found. * * 1. Save the current compressed node $NEXT pointer (the pointer to the * child element, that is always present in compressed nodes). * * 2. Create "split node" having as child the non common letter * at the compressed node. The other non common letter (at the key) * will be added later as we continue the normal insertion algorithm * at step "6". * * 3a. IF $SPLITPOS == 0: * Replace the old node with the split node, by copying the auxiliary * data if any. Fix parent's reference. Free old node eventually * (we still need its data for the next steps of the algorithm). * * 3b. IF $SPLITPOS != 0: * Trim the compressed node (reallocating it as well) in order to * contain $splitpos characters. Change chilid pointer in order to link * to the split node. If new compressed node len is just 1, set * iscompr to 0 (layout is the same). Fix parent's reference. * * 4a. IF the postfix len (the length of the remaining string of the * original compressed node after the split character) is non zero, * create a "postfix node". If the postfix node has just one character * set iscompr to 0, otherwise iscompr to 1. Set the postfix node * child pointer to $NEXT. * * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer. * * 5. Set child[0] of split node to postfix node. * * 6. Set the split node as the current node, set current index at child[1] * and continue insertion algorithm as usually. * * ============================= ALGO 2 ============================= * * For case 5, that is, if we stopped in the middle of a compressed * node but no mismatch was found, do: * * Let $SPLITPOS be the zero-based index at which, in the * compressed node array of characters, we stopped iterating because * there were no more keys character to match. So in the example of * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4. * * 1. Save the current compressed node $NEXT pointer (the pointer to the * child element, that is always present in compressed nodes). * * 2. Create a "postfix node" containing all the characters from $SPLITPOS * to the end. Use $NEXT as the postfix node child pointer. * If the postfix node length is 1, set iscompr to 0. * Set the node as a key with the associated value of the new * inserted key. * * 3. Trim the current node to contain the first $SPLITPOS characters. * As usually if the new node length is just 1, set iscompr to 0. * Take the iskey / associated value as it was in the orignal node. * Fix the parent's reference. * * 4. Set the postfix node as the only child pointer of the trimmed * node created at step 1. */ /* ------------------------- ALGORITHM 1 --------------------------- */ if (h->iscompr && i != len) { debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n", h->size, h->data, (void *) h); debugf("Still to insert: %.*s\n", (int) (len - i), s + i); debugf("Splitting at %d: '%c'\n", j, ((char *) h->data)[j]); debugf("Other (key) letter is '%c'\n", s[i]); /* 1: Save next pointer. */ raxNode **childfield = raxNodeLastChildPtr(h); raxNode *next; memcpy(&next, childfield, sizeof(next)); debugf("Next is %p\n", (void *) next); debugf("iskey %d\n", h->iskey); if (h->iskey) { debugf("key value is %p\n", raxGetData(h)); } /* Set the length of the additional nodes we will need. */ size_t trimmedlen = j; size_t postfixlen = h->size - j - 1; int split_node_is_key = !trimmedlen && h->iskey && !h->isnull; size_t nodesize; /* 2: Create the split node. Also allocate the other nodes we'll need * ASAP, so that it will be simpler to handle OOM. */ raxNode *splitnode = raxNewNode(1, split_node_is_key); raxNode *trimmed = NULL; raxNode *postfix = NULL; if (trimmedlen) { nodesize = sizeof(raxNode) + trimmedlen + sizeof(raxNode *); if (h->iskey && !h->isnull) nodesize += sizeof(void *); trimmed = rax_malloc(nodesize); } if (postfixlen) { nodesize = sizeof(raxNode) + postfixlen + sizeof(raxNode *); postfix = rax_malloc(nodesize); } /* OOM? Abort now that the tree is untouched. */ if (splitnode == NULL || (trimmedlen && trimmed == NULL) || (postfixlen && postfix == NULL)) { rax_free(splitnode); rax_free(trimmed); rax_free(postfix); errno = ENOMEM; return 0; } splitnode->data[0] = h->data[j]; if (j == 0) { /* 3a: Replace the old node with the split node. */ if (h->iskey) { void *ndata = raxGetData(h); raxSetData(splitnode, ndata); } memcpy(parentlink, &splitnode, sizeof(splitnode)); } else { /* 3b: Trim the compressed node. */ trimmed->size = j; memcpy(trimmed->data, h->data, j); trimmed->iscompr = j > 1 ? 1 : 0; trimmed->iskey = h->iskey; trimmed->isnull = h->isnull; if (h->iskey && !h->isnull) { void *ndata = raxGetData(h); raxSetData(trimmed, ndata); } raxNode **cp = raxNodeLastChildPtr(trimmed); memcpy(cp, &splitnode, sizeof(splitnode)); memcpy(parentlink, &trimmed, sizeof(trimmed)); parentlink = cp; /* Set parentlink to splitnode parent. */ rax->numnodes++; } /* 4: Create the postfix node: what remains of the original * compressed node after the split. */ if (postfixlen) { /* 4a: create a postfix node. */ postfix->iskey = 0; postfix->isnull = 0; postfix->size = postfixlen; postfix->iscompr = postfixlen > 1; memcpy(postfix->data, h->data + j + 1, postfixlen); raxNode **cp = raxNodeLastChildPtr(postfix); memcpy(cp, &next, sizeof(next)); rax->numnodes++; } else { /* 4b: just use next as postfix node. */ postfix = next; } /* 5: Set splitnode first child as the postfix node. */ raxNode **splitchild = raxNodeLastChildPtr(splitnode); memcpy(splitchild, &postfix, sizeof(postfix)); /* 6. Continue insertion: this will cause the splitnode to * get a new child (the non common character at the currently * inserted key). */ rax_free(h); h = splitnode; } else if (h->iscompr && i == len) { /* ------------------------- ALGORITHM 2 --------------------------- */ debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n", h->size, h->data, (void *) h, j); /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */ size_t postfixlen = h->size - j; size_t nodesize = sizeof(raxNode) + postfixlen + sizeof(raxNode *); if (data != NULL) nodesize += sizeof(void *); raxNode *postfix = rax_malloc(nodesize); nodesize = sizeof(raxNode) + j + sizeof(raxNode *); if (h->iskey && !h->isnull) nodesize += sizeof(void *); raxNode *trimmed = rax_malloc(nodesize); if (postfix == NULL || trimmed == NULL) { rax_free(postfix); rax_free(trimmed); errno = ENOMEM; return 0; } /* 1: Save next pointer. */ raxNode **childfield = raxNodeLastChildPtr(h); raxNode *next; memcpy(&next, childfield, sizeof(next)); /* 2: Create the postfix node. */ postfix->size = postfixlen; postfix->iscompr = postfixlen > 1; postfix->iskey = 1; postfix->isnull = 0; memcpy(postfix->data, h->data + j, postfixlen); raxSetData(postfix, data); raxNode **cp = raxNodeLastChildPtr(postfix); memcpy(cp, &next, sizeof(next)); rax->numnodes++; /* 3: Trim the compressed node. */ trimmed->size = j; trimmed->iscompr = j > 1; trimmed->iskey = 0; trimmed->isnull = 0; memcpy(trimmed->data, h->data, j); memcpy(parentlink, &trimmed, sizeof(trimmed)); if (h->iskey) { void *aux = raxGetData(h); raxSetData(trimmed, aux); } /* Fix the trimmed node child pointer to point to * the postfix node. */ cp = raxNodeLastChildPtr(trimmed); memcpy(cp, &postfix, sizeof(postfix)); /* Finish! We don't need to contine with the insertion * algorithm for ALGO 2. The key is already inserted. */ rax->numele++; rax_free(h); return 1; /* Key inserted. */ } /* We walked the radix tree as far as we could, but still there are left * chars in our string. We need to insert the missing nodes. */ while (i < len) { raxNode *child; /* If this node is going to have a single child, and there * are other characters, so that that would result in a chain * of single-childed nodes, turn it into a compressed node. */ if (h->size == 0 && len - i > 1) { debugf("Inserting compressed node\n"); size_t comprsize = len - i; if (comprsize > RAX_NODE_MAX_SIZE) comprsize = RAX_NODE_MAX_SIZE; raxNode *newh = raxCompressNode(h, s + i, comprsize, &child); if (newh == NULL) goto oom; h = newh; memcpy(parentlink, &h, sizeof(h)); parentlink = raxNodeLastChildPtr(h); i += comprsize; } else { debugf("Inserting normal node\n"); raxNode **new_parentlink; raxNode *newh = raxAddChild(h, s[i], &child, &new_parentlink); if (newh == NULL) goto oom; h = newh; memcpy(parentlink, &h, sizeof(h)); parentlink = new_parentlink; i++; } rax->numnodes++; h = child; } raxNode *newh = raxReallocForData(h, data); if (newh == NULL) goto oom; h = newh; if (!h->iskey) rax->numele++; raxSetData(h, data); memcpy(parentlink, &h, sizeof(h)); return 1; /* Element inserted. */ oom: /* This code path handles out of memory after part of the sub-tree was * already modified. Set the node as a key, and then remove it. However we * do that only if the node is a terminal node, otherwise if the OOM * happened reallocating a node in the middle, we don't need to free * anything. */ if (h->size == 0) { h->isnull = 1; h->iskey = 1; rax->numele++; /* Compensate the next remove. */ assert(raxRemove(rax, s, i, NULL) != 0); } errno = ENOMEM; return 0; } /* Overwriting insert. Just a wrapper for raxGenericInsert() that will * update the element if there is already one for the same key. */ int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { return raxGenericInsert(rax, s, len, data, old, 1); } /* Non overwriting insert function: this if an element with the same key * exists, the value is not updated and the function returns 0. * This is a just a wrapper for raxGenericInsert(). */ int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) { return raxGenericInsert(rax, s, len, data, old, 0); } /* Find a key in the rax, returns raxNotFound special void pointer value * if the item was not found, otherwise the value associated with the * item is returned. */ void *raxFind(rax *rax, unsigned char *s, size_t len) { raxNode *h; debugf("### Lookup: %.*s\n", (int) len, s); int splitpos = 0; size_t i = raxLowWalk(rax, s, len, &h, NULL, &splitpos, NULL); if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) return raxNotFound; return raxGetData(h); } /* Return the memory address where the 'parent' node stores the specified * 'child' pointer, so that the caller can update the pointer with another * one if needed. The function assumes it will find a match, otherwise the * operation is an undefined behavior (it will continue scanning the * memory without any bound checking). */ raxNode **raxFindParentLink(raxNode *parent, raxNode *child) { raxNode **cp = raxNodeFirstChildPtr(parent); raxNode *c; while (1) { memcpy(&c, cp, sizeof(c)); if (c == child) break; cp++; } return cp; } /* Low level child removal from node. The new node pointer (after the child * removal) is returned. Note that this function does not fix the pointer * of the parent node in its parent, so this task is up to the caller. * The function never fails for out of memory. */ raxNode *raxRemoveChild(raxNode *parent, raxNode *child) { debugnode("raxRemoveChild before", parent); /* If parent is a compressed node (having a single child, as for definition * of the data structure), the removal of the child consists into turning * it into a normal node without children. */ if (parent->iscompr) { void *data = NULL; if (parent->iskey) data = raxGetData(parent); parent->isnull = 0; parent->iscompr = 0; parent->size = 0; if (parent->iskey) raxSetData(parent, data); debugnode("raxRemoveChild after", parent); return parent; } /* Otherwise we need to scan for the children pointer and memmove() * accordingly. * * 1. To start we seek the first element in both the children * pointers and edge bytes in the node. */ raxNode **cp = raxNodeFirstChildPtr(parent); raxNode **c = cp; unsigned char *e = parent->data; /* 2. Search the child pointer to remove inside the array of children * pointers. */ while (1) { raxNode *aux; memcpy(&aux, c, sizeof(aux)); if (aux == child) break; c++; e++; } /* 3. Remove the edge and the pointer by memmoving the remaining children * pointer and edge bytes one position before. */ int taillen = parent->size - (e - parent->data) - 1; debugf("raxRemoveChild tail len: %d\n", taillen); memmove(e, e + 1, taillen); /* Since we have one data byte less, also child pointers start one byte * before now. */ memmove(((char *) cp) - 1, cp, (parent->size - taillen - 1) * sizeof(raxNode **)); /* Move the remaining "tail" pointer at the right position as well. */ size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void *) : 0; memmove(((char *) c) - 1, c + 1, taillen * sizeof(raxNode **) + valuelen); /* 4. Update size. */ parent->size--; /* realloc the node according to the theoretical memory usage, to free * data if we are over-allocating right now. */ raxNode *newnode = rax_realloc(parent, raxNodeCurrentLength(parent)); if (newnode) { debugnode("raxRemoveChild after", newnode); } /* Note: if rax_realloc() fails we just return the old address, which * is valid. */ return newnode ? newnode : parent; } /* Remove the specified item. Returns 1 if the item was found and * deleted, 0 otherwise. */ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { raxNode *h; raxStack ts; debugf("### Delete: %.*s\n", (int) len, s); raxStackInit(&ts); int splitpos = 0; size_t i = raxLowWalk(rax, s, len, &h, NULL, &splitpos, &ts); if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) { raxStackFree(&ts); return 0; } if (old) *old = raxGetData(h); h->iskey = 0; rax->numele--; /* If this node has no children, the deletion needs to reclaim the * no longer used nodes. This is an iterative process that needs to * walk the three upward, deleting all the nodes with just one child * that are not keys, until the head of the rax is reached or the first * node with more than one child is found. */ int trycompress = 0; /* Will be set to 1 if we should try to optimize the tree resulting from the deletion. */ if (h->size == 0) { debugf("Key deleted in node without children. Cleanup needed.\n"); raxNode *child = NULL; while (h != rax->head) { child = h; debugf("Freeing child %p [%.*s] key:%d\n", (void *) child, (int) child->size, (char *) child->data, child->iskey); rax_free(child); rax->numnodes--; h = raxStackPop(&ts); /* If this node has more then one child, or actually holds * a key, stop here. */ if (h->iskey || (!h->iscompr && h->size != 1)) break; } if (child) { debugf("Unlinking child %p from parent %p\n", (void *) child, (void *) h); raxNode * new = raxRemoveChild(h, child); if (new != h) { raxNode *parent = raxStackPeek(&ts); raxNode **parentlink; if (parent == NULL) { parentlink = &rax->head; } else { parentlink = raxFindParentLink(parent, h); } memcpy(parentlink, &new, sizeof(new)); } /* If after the removal the node has just a single child * and is not a key, we need to try to compress it. */ if (new->size == 1 && new->iskey == 0) { trycompress = 1; h = new; } } } else if (h->size == 1) { /* If the node had just one child, after the removal of the key * further compression with adjacent nodes is pontentially possible. */ trycompress = 1; } /* Don't try node compression if our nodes pointers stack is not * complete because of OOM while executing raxLowWalk() */ if (trycompress && ts.oom) trycompress = 0; /* Recompression: if trycompress is true, 'h' points to a radix tree node * that changed in a way that could allow to compress nodes in this * sub-branch. Compressed nodes represent chains of nodes that are not * keys and have a single child, so there are two deletion events that * may alter the tree so that further compression is needed: * * 1) A node with a single child was a key and now no longer is a key. * 2) A node with two children now has just one child. * * We try to navigate upward till there are other nodes that can be * compressed, when we reach the upper node which is not a key and has * a single child, we scan the chain of children to collect the * compressable part of the tree, and replace the current node with the * new one, fixing the child pointer to reference the first non * compressable node. * * Example of case "1". A tree stores the keys "FOO" = 1 and * "FOOBAR" = 2: * * * "FOO" -> "BAR" -> [] (2) * (1) * * After the removal of "FOO" the tree can be compressed as: * * "FOOBAR" -> [] (2) * * * Example of case "2". A tree stores the keys "FOOBAR" = 1 and * "FOOTER" = 2: * * |B| -> "AR" -> [] (1) * "FOO" -> |-| * |T| -> "ER" -> [] (2) * * After the removal of "FOOTER" the resulting tree is: * * "FOO" -> |B| -> "AR" -> [] (1) * * That can be compressed into: * * "FOOBAR" -> [] (1) */ if (trycompress) { debugf("After removing %.*s:\n", (int) len, s); debugnode("Compression may be needed", h); debugf("Seek start node\n"); /* Try to reach the upper node that is compressible. * At the end of the loop 'h' will point to the first node we * can try to compress and 'parent' to its parent. */ raxNode *parent; while (1) { parent = raxStackPop(&ts); if (!parent || parent->iskey || (!parent->iscompr && parent->size != 1)) break; h = parent; debugnode("Going up to", h); } raxNode *start = h; /* Compression starting node. */ /* Scan chain of nodes we can compress. */ size_t comprsize = h->size; int nodes = 1; while (h->size != 0) { raxNode **cp = raxNodeLastChildPtr(h); memcpy(&h, cp, sizeof(h)); if (h->iskey || (!h->iscompr && h->size != 1)) break; /* Stop here if going to the next node would result into * a compressed node larger than h->size can hold. */ if (comprsize + h->size > RAX_NODE_MAX_SIZE) break; nodes++; comprsize += h->size; } if (nodes > 1) { /* If we can compress, create the new node and populate it. */ size_t nodesize = sizeof(raxNode) + comprsize + sizeof(raxNode *); raxNode * new = rax_malloc(nodesize); /* An out of memory here just means we cannot optimize this * node, but the tree is left in a consistent state. */ if (new == NULL) { raxStackFree(&ts); return 1; } new->iskey = 0; new->isnull = 0; new->iscompr = 1; new->size = comprsize; rax->numnodes++; /* Scan again, this time to populate the new node content and * to fix the new node child pointer. At the same time we free * all the nodes that we'll no longer use. */ comprsize = 0; h = start; while (h->size != 0) { memcpy(new->data + comprsize, h->data, h->size); comprsize += h->size; raxNode **cp = raxNodeLastChildPtr(h); raxNode *tofree = h; memcpy(&h, cp, sizeof(h)); rax_free(tofree); rax->numnodes--; if (h->iskey || (!h->iscompr && h->size != 1)) break; } debugnode("New node", new); /* Now 'h' points to the first node that we still need to use, * so our new node child pointer will point to it. */ raxNode **cp = raxNodeLastChildPtr(new); memcpy(cp, &h, sizeof(h)); /* Fix parent link. */ if (parent) { raxNode **parentlink = raxFindParentLink(parent, start); memcpy(parentlink, &new, sizeof(new)); } else { rax->head = new; } debugf("Compressed %d nodes, %d total bytes\n", nodes, (int) comprsize); } } raxStackFree(&ts); return 1; } /* This is the core of raxFree(): performs a depth-first scan of the * tree and releases all the nodes found. */ void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void *)) { debugnode("free traversing", n); int numchildren = n->iscompr ? 1 : n->size; raxNode **cp = raxNodeLastChildPtr(n); while (numchildren--) { raxNode *child; memcpy(&child, cp, sizeof(child)); raxRecursiveFree(rax, child, free_callback); cp--; } debugnode("free depth-first", n); if (free_callback && n->iskey && !n->isnull) free_callback(raxGetData(n)); rax_free(n); rax->numnodes--; } /* Free a whole radix tree, calling the specified callback in order to * free the auxiliary data. */ void raxFreeWithCallback(rax *rax, void (*free_callback)(void *)) { raxRecursiveFree(rax, rax->head, free_callback); assert(rax->numnodes == 0); rax_free(rax); } /* Free a whole radix tree. */ void raxFree(rax *rax) { raxFreeWithCallback(rax, NULL); } /* ------------------------------- Iterator --------------------------------- */ /* Initialize a Rax iterator. This call should be performed a single time * to initialize the iterator, and must be followed by a raxSeek() call, * otherwise the raxPrev()/raxNext() functions will just return EOF. */ void raxStart(raxIterator *it, rax *rt) { it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */ it->rt = rt; it->key_len = 0; it->key = it->key_static_string; it->key_max = RAX_ITER_STATIC_LEN; it->data = NULL; it->node_cb = NULL; raxStackInit(&it->stack); } /* Append characters at the current key string of the iterator 'it'. This * is a low level function used to implement the iterator, not callable by * the user. Returns 0 on out of memory, otherwise 1 is returned. */ int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) { if (it->key_max < it->key_len + len) { unsigned char *old = (it->key == it->key_static_string) ? NULL : it->key; size_t new_max = (it->key_len + len) * 2; it->key = rax_realloc(old, new_max); if (it->key == NULL) { it->key = (!old) ? it->key_static_string : old; errno = ENOMEM; return 0; } if (old == NULL) memcpy(it->key, it->key_static_string, it->key_len); it->key_max = new_max; } /* Use memmove since there could be an overlap between 's' and * it->key when we use the current key in order to re-seek. */ memmove(it->key + it->key_len, s, len); it->key_len += len; return 1; } /* Remove the specified number of chars from the right of the current * iterator key. */ void raxIteratorDelChars(raxIterator *it, size_t count) { it->key_len -= count; } /* Do an iteration step towards the next element. At the end of the step the * iterator key will represent the (new) current key. If it is not possible * to step in the specified direction since there are no longer elements, the * iterator is flagged with RAX_ITER_EOF. * * If 'noup' is true the function starts directly scanning for the next * lexicographically smaller children, and the current node is already assumed * to be the parent of the last key node, so the first operation to go back to * the parent will be skipped. This option is used by raxSeek() when * implementing seeking a non existing element with the ">" or "<" options: * the starting node is not a key in that particular case, so we start the scan * from a node that does not represent the key set. * * The function returns 1 on success or 0 on out of memory. */ int raxIteratorNextStep(raxIterator *it, int noup) { if (it->flags & RAX_ITER_EOF) { return 1; } else if (it->flags & RAX_ITER_JUST_SEEKED) { it->flags &= ~RAX_ITER_JUST_SEEKED; return 1; } /* Save key len, stack items and the node where we are currently * so that on iterator EOF we can restore the current key and state. */ size_t orig_key_len = it->key_len; size_t orig_stack_items = it->stack.items; raxNode *orig_node = it->node; while (1) { int children = it->node->iscompr ? 1 : it->node->size; if (!noup && children) { debugf("GO DEEPER\n"); /* Seek the lexicographically smaller key in this subtree, which * is the first one found always going torwards the first child * of every successive node. */ if (!raxStackPush(&it->stack, it->node)) return 0; raxNode **cp = raxNodeFirstChildPtr(it->node); if (!raxIteratorAddChars(it, it->node->data, it->node->iscompr ? it->node->size : 1)) return 0; memcpy(&it->node, cp, sizeof(it->node)); /* Call the node callback if any, and replace the node pointer * if the callback returns true. */ if (it->node_cb && it->node_cb(&it->node)) memcpy(cp, &it->node, sizeof(it->node)); /* For "next" step, stop every time we find a key along the * way, since the key is lexicograhically smaller compared to * what follows in the sub-children. */ if (it->node->iskey) { it->data = raxGetData(it->node); return 1; } } else { /* If we finished exporing the previous sub-tree, switch to the * new one: go upper until a node is found where there are * children representing keys lexicographically greater than the * current key. */ while (1) { int old_noup = noup; /* Already on head? Can't go up, iteration finished. */ if (!noup && it->node == it->rt->head) { it->flags |= RAX_ITER_EOF; it->stack.items = orig_stack_items; it->key_len = orig_key_len; it->node = orig_node; return 1; } /* If there are no children at the current node, try parent's * next child. */ unsigned char prevchild = it->key[it->key_len - 1]; if (!noup) { it->node = raxStackPop(&it->stack); } else { noup = 0; } /* Adjust the current key to represent the node we are * at. */ int todel = it->node->iscompr ? it->node->size : 1; raxIteratorDelChars(it, todel); /* Try visiting the next child if there was at least one * additional child. */ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { raxNode **cp = raxNodeFirstChildPtr(it->node); int i = 0; while (i < it->node->size) { debugf("SCAN NEXT %c\n", it->node->data[i]); if (it->node->data[i] > prevchild) break; i++; cp++; } if (i != it->node->size) { debugf("SCAN found a new node\n"); raxIteratorAddChars(it, it->node->data + i, 1); if (!raxStackPush(&it->stack, it->node)) return 0; memcpy(&it->node, cp, sizeof(it->node)); /* Call the node callback if any, and replace the node * pointer if the callback returns true. */ if (it->node_cb && it->node_cb(&it->node)) memcpy(cp, &it->node, sizeof(it->node)); if (it->node->iskey) { it->data = raxGetData(it->node); return 1; } break; } } } } } } /* Seek the grestest key in the subtree at the current node. Return 0 on * out of memory, otherwise 1. This is an helper function for different * iteration functions below. */ int raxSeekGreatest(raxIterator *it) { while (it->node->size) { if (it->node->iscompr) { if (!raxIteratorAddChars(it, it->node->data, it->node->size)) return 0; } else { if (!raxIteratorAddChars(it, it->node->data + it->node->size - 1, 1)) return 0; } raxNode **cp = raxNodeLastChildPtr(it->node); if (!raxStackPush(&it->stack, it->node)) return 0; memcpy(&it->node, cp, sizeof(it->node)); } return 1; } /* Like raxIteratorNextStep() but implements an iteration step moving * to the lexicographically previous element. The 'noup' option has a similar * effect to the one of raxIteratorNextStep(). */ int raxIteratorPrevStep(raxIterator *it, int noup) { if (it->flags & RAX_ITER_EOF) { return 1; } else if (it->flags & RAX_ITER_JUST_SEEKED) { it->flags &= ~RAX_ITER_JUST_SEEKED; return 1; } /* Save key len, stack items and the node where we are currently * so that on iterator EOF we can restore the current key and state. */ size_t orig_key_len = it->key_len; size_t orig_stack_items = it->stack.items; raxNode *orig_node = it->node; while (1) { int old_noup = noup; /* Already on head? Can't go up, iteration finished. */ if (!noup && it->node == it->rt->head) { it->flags |= RAX_ITER_EOF; it->stack.items = orig_stack_items; it->key_len = orig_key_len; it->node = orig_node; return 1; } unsigned char prevchild = it->key[it->key_len - 1]; if (!noup) { it->node = raxStackPop(&it->stack); } else { noup = 0; } /* Adjust the current key to represent the node we are * at. */ int todel = it->node->iscompr ? it->node->size : 1; raxIteratorDelChars(it, todel); /* Try visiting the prev child if there is at least one * child. */ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) { raxNode **cp = raxNodeLastChildPtr(it->node); int i = it->node->size - 1; while (i >= 0) { debugf("SCAN PREV %c\n", it->node->data[i]); if (it->node->data[i] < prevchild) break; i--; cp--; } /* If we found a new subtree to explore in this node, * go deeper following all the last children in order to * find the key lexicographically greater. */ if (i != -1) { debugf("SCAN found a new node\n"); /* Enter the node we just found. */ if (!raxIteratorAddChars(it, it->node->data + i, 1)) return 0; if (!raxStackPush(&it->stack, it->node)) return 0; memcpy(&it->node, cp, sizeof(it->node)); /* Seek sub-tree max. */ if (!raxSeekGreatest(it)) return 0; } } /* Return the key: this could be the key we found scanning a new * subtree, or if we did not find a new subtree to explore here, * before giving up with this node, check if it's a key itself. */ if (it->node->iskey) { it->data = raxGetData(it->node); return 1; } } } /* Seek an iterator at the specified element. * Return 0 if the seek failed for syntax error or out of memory. Otherwise * 1 is returned. When 0 is returned for out of memory, errno is set to * the ENOMEM value. */ int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { int eq = 0, lt = 0, gt = 0, first = 0, last = 0; it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */ it->flags |= RAX_ITER_JUST_SEEKED; it->flags &= ~RAX_ITER_EOF; it->key_len = 0; it->node = NULL; /* Set flags according to the operator used to perform the seek. */ if (op[0] == '>') { gt = 1; if (op[1] == '=') eq = 1; } else if (op[0] == '<') { lt = 1; if (op[1] == '=') eq = 1; } else if (op[0] == '=') { eq = 1; } else if (op[0] == '^') { first = 1; } else if (op[0] == '$') { last = 1; } else { errno = 0; return 0; /* Error. */ } /* If there are no elements, set the EOF condition immediately and * return. */ if (it->rt->numele == 0) { it->flags |= RAX_ITER_EOF; return 1; } if (first) { /* Seeking the first key greater or equal to the empty string * is equivalent to seeking the smaller key available. */ return raxSeek(it, ">=", NULL, 0); } if (last) { /* Find the greatest key taking always the last child till a * final node is found. */ it->node = it->rt->head; if (!raxSeekGreatest(it)) return 0; assert(it->node->iskey); it->data = raxGetData(it->node); return 1; } /* We need to seek the specified key. What we do here is to actually * perform a lookup, and later invoke the prev/next key code that * we already use for iteration. */ int splitpos = 0; size_t i = raxLowWalk(it->rt, ele, len, &it->node, NULL, &splitpos, &it->stack); /* Return OOM on incomplete stack info. */ if (it->stack.oom) return 0; if (eq && i == len && (!it->node->iscompr || splitpos == 0) && it->node->iskey) { /* We found our node, since the key matches and we have an * "equal" condition. */ if (!raxIteratorAddChars(it, ele, len)) return 0; /* OOM. */ it->data = raxGetData(it->node); } else if (lt || gt) { /* Exact key not found or eq flag not set. We have to set as current * key the one represented by the node we stopped at, and perform * a next/prev operation to seek. To reconstruct the key at this node * we start from the parent and go to the current node, accumulating * the characters found along the way. */ if (!raxStackPush(&it->stack, it->node)) return 0; for (size_t j = 1; j < it->stack.items; j++) { raxNode *parent = it->stack.stack[j - 1]; raxNode *child = it->stack.stack[j]; if (parent->iscompr) { if (!raxIteratorAddChars(it, parent->data, parent->size)) return 0; } else { raxNode **cp = raxNodeFirstChildPtr(parent); unsigned char *p = parent->data; while (1) { raxNode *aux; memcpy(&aux, cp, sizeof(aux)); if (aux == child) break; cp++; p++; } if (!raxIteratorAddChars(it, p, 1)) return 0; } } raxStackPop(&it->stack); /* We need to set the iterator in the correct state to call next/prev * step in order to seek the desired element. */ debugf("After initial seek: i=%d len=%d key=%.*s\n", (int) i, (int) len, (int) it->key_len, it->key); if (i != len && !it->node->iscompr) { /* If we stopped in the middle of a normal node because of a * mismatch, add the mismatching character to the current key * and call the iterator with the 'noup' flag so that it will try * to seek the next/prev child in the current node directly based * on the mismatching character. */ if (!raxIteratorAddChars(it, ele + i, 1)) return 0; debugf("Seek normal node on mismatch: %.*s\n", (int) it->key_len, (char *) it->key); it->flags &= ~RAX_ITER_JUST_SEEKED; if (lt && !raxIteratorPrevStep(it, 1)) return 0; if (gt && !raxIteratorNextStep(it, 1)) return 0; it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ } else if (i != len && it->node->iscompr) { debugf("Compressed mismatch: %.*s\n", (int) it->key_len, (char *) it->key); /* In case of a mismatch within a compressed node. */ int nodechar = it->node->data[splitpos]; int keychar = ele[i]; it->flags &= ~RAX_ITER_JUST_SEEKED; if (gt) { /* If the key the compressed node represents is greater * than our seek element, continue forward, otherwise set the * state in order to go back to the next sub-tree. */ if (nodechar > keychar) { if (!raxIteratorNextStep(it, 0)) return 0; } else { if (!raxIteratorAddChars(it, it->node->data, it->node->size)) return 0; if (!raxIteratorNextStep(it, 1)) return 0; } } if (lt) { /* If the key the compressed node represents is smaller * than our seek element, seek the greater key in this * subtree, otherwise set the state in order to go back to * the previous sub-tree. */ if (nodechar < keychar) { if (!raxSeekGreatest(it)) return 0; it->data = raxGetData(it->node); } else { if (!raxIteratorAddChars(it, it->node->data, it->node->size)) return 0; if (!raxIteratorPrevStep(it, 1)) return 0; } } it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ } else { debugf("No mismatch: %.*s\n", (int) it->key_len, (char *) it->key); /* If there was no mismatch we are into a node representing the * key, (but which is not a key or the seek operator does not * include 'eq'), or we stopped in the middle of a compressed node * after processing all the key. Continue iterating as this was * a legitimate key we stopped at. */ it->flags &= ~RAX_ITER_JUST_SEEKED; if (it->node->iscompr && it->node->iskey && splitpos && lt) { /* If we stopped in the middle of a compressed node with * perfect match, and the condition is to seek a key "<" than * the specified one, then if this node is a key it already * represents our match. For instance we may have nodes: * * "f" -> "oobar" = 1 -> "" = 2 * * Representing keys "f" = 1, "foobar" = 2. A seek for * the key < "foo" will stop in the middle of the "oobar" * node, but will be our match, representing the key "f". * * So in that case, we don't seek backward. */ } else { if (gt && !raxIteratorNextStep(it, 0)) return 0; if (lt && !raxIteratorPrevStep(it, 0)) return 0; } it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */ } } else { /* If we are here just eq was set but no match was found. */ it->flags |= RAX_ITER_EOF; return 1; } return 1; } /* Go to the next element in the scope of the iterator 'it'. * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ int raxNext(raxIterator *it) { if (!raxIteratorNextStep(it, 0)) { errno = ENOMEM; return 0; } if (it->flags & RAX_ITER_EOF) { errno = 0; return 0; } return 1; } /* Go to the previous element in the scope of the iterator 'it'. * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */ int raxPrev(raxIterator *it) { if (!raxIteratorPrevStep(it, 0)) { errno = ENOMEM; return 0; } if (it->flags & RAX_ITER_EOF) { errno = 0; return 0; } return 1; } /* Perform a random walk starting in the current position of the iterator. * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned * and the iterator is set to the node reached after doing a random walk * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed * using a random number of steps between 1 and two times the logarithm of * the number of elements. * * NOTE: if you use this function to generate random elements from the radix * tree, expect a disappointing distribution. A random walk produces good * random elements if the tree is not sparse, however in the case of a radix * tree certain keys will be reported much more often than others. At least * this function should be able to expore every possible element eventually. */ int raxRandomWalk(raxIterator *it, size_t steps) { if (it->rt->numele == 0) { it->flags |= RAX_ITER_EOF; return 0; } if (steps == 0) { size_t fle = floor(log(it->rt->numele)); fle *= 2; steps = 1 + rand() % fle; } raxNode *n = it->node; while (steps > 0 || !n->iskey) { int numchildren = n->iscompr ? 1 : n->size; int r = rand() % (numchildren + (n != it->rt->head)); if (r == numchildren) { /* Go up to parent. */ n = raxStackPop(&it->stack); int todel = n->iscompr ? n->size : 1; raxIteratorDelChars(it, todel); } else { /* Select a random child. */ if (n->iscompr) { if (!raxIteratorAddChars(it, n->data, n->size)) return 0; } else { if (!raxIteratorAddChars(it, n->data + r, 1)) return 0; } raxNode **cp = raxNodeFirstChildPtr(n) + r; if (!raxStackPush(&it->stack, n)) return 0; memcpy(&n, cp, sizeof(n)); } if (n->iskey) steps--; } it->node = n; return 1; } /* Compare the key currently pointed by the iterator to the specified * key according to the specified operator. Returns 1 if the comparison is * true, otherwise 0 is returned. */ int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) { int eq = 0, lt = 0, gt = 0; if (op[0] == '=' || op[1] == '=') eq = 1; if (op[0] == '>') gt = 1; else if (op[0] == '<') lt = 1; else if (op[1] != '=') return 0; /* Syntax error. */ size_t minlen = key_len < iter->key_len ? key_len : iter->key_len; int cmp = memcmp(iter->key, key, minlen); /* Handle == */ if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len; /* Handle >, >=, <, <= */ if (cmp == 0) { /* Same prefix: longer wins. */ if (eq && key_len == iter->key_len) return 1; else if (lt) return iter->key_len < key_len; else if (gt) return iter->key_len > key_len; } if (cmp > 0) { return gt ? 1 : 0; } else /* (cmp < 0) */ { return lt ? 1 : 0; } } /* Free the iterator. */ void raxStop(raxIterator *it) { if (it->key != it->key_static_string) rax_free(it->key); raxStackFree(&it->stack); } /* Return if the iterator is in an EOF state. This happens when raxSeek() * failed to seek an appropriate element, so that raxNext() or raxPrev() * will return zero, or when an EOF condition was reached while iterating * with raxNext() and raxPrev(). */ int raxEOF(raxIterator *it) { return it->flags & RAX_ITER_EOF; } /* Return the number of elements inside the radix tree. */ uint64_t raxSize(rax *rax) { return rax->numele; } /* ----------------------------- Introspection ------------------------------ */ /* This function is mostly used for debugging and learning purposes. * It shows an ASCII representation of a tree on standard output, outling * all the nodes and the contained keys. * * The representation is as follow: * * "foobar" (compressed node) * [abc] (normal node with three children) * [abc]=0x12345678 (node is a key, pointing to value 0x12345678) * [] (a normal empty node) * * Children are represented in new idented lines, each children prefixed by * the "`-(x)" string, where "x" is the edge byte. * * [abc] * `-(a) "ladin" * `-(b) [kj] * `-(c) [] * * However when a node has a single child the following representation * is used instead: * * [abc] -> "ladin" -> [] */ /* The actual implementation of raxShow(). */ void raxRecursiveShow(int level, int lpad, raxNode *n) { char s = n->iscompr ? '"' : '['; char e = n->iscompr ? '"' : ']'; int numchars = printf("%c%.*s%c", s, n->size, n->data, e); if (n->iskey) { numchars += printf("=%p", raxGetData(n)); } int numchildren = n->iscompr ? 1 : n->size; /* Note that 7 and 4 magic constants are the string length * of " `-(x) " and " -> " respectively. */ if (level) { lpad += (numchildren > 1) ? 7 : 4; if (numchildren == 1) lpad += numchars; } raxNode **cp = raxNodeFirstChildPtr(n); for (int i = 0; i < numchildren; i++) { char *branch = " `-(%c) "; if (numchildren > 1) { printf("\n"); for (int j = 0; j < lpad; j++) putchar(' '); printf(branch, n->data[i]); } else { printf(" -> "); } raxNode *child; memcpy(&child, cp, sizeof(child)); raxRecursiveShow(level + 1, lpad, child); cp++; } } /* Show a tree, as outlined in the comment above. */ void raxShow(rax *rax) { raxRecursiveShow(0, 0, rax->head); putchar('\n'); } /* Used by debugnode() macro to show info about a given node. */ void raxDebugShowNode(const char *msg, raxNode *n) { printf("%s: %p [%.*s] key:%d size:%d children:", msg, (void *) n, (int) n->size, (char *) n->data, n->iskey, n->size); int numcld = n->iscompr ? 1 : n->size; raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld - 1); while (numcld--) { raxNode *child; memcpy(&child, cldptr, sizeof(child)); cldptr++; printf("%p ", (void *) child); } printf("\n"); fflush(stdout); }